diff options
Diffstat (limited to 'net')
157 files changed, 5582 insertions, 1073 deletions
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 2a7f1b15714a..407b2335f091 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -702,20 +702,7 @@ static int vlan_ethtool_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info) { const struct vlan_dev_priv *vlan = vlan_dev_priv(dev); - const struct ethtool_ops *ops = vlan->real_dev->ethtool_ops; - struct phy_device *phydev = vlan->real_dev->phydev; - - if (phy_has_tsinfo(phydev)) { - return phy_ts_info(phydev, info); - } else if (ops->get_ts_info) { - return ops->get_ts_info(vlan->real_dev, info); - } else { - info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE | - SOF_TIMESTAMPING_SOFTWARE; - info->phc_index = -1; - } - - return 0; + return ethtool_get_ts_info_by_layer(vlan->real_dev, info); } static void vlan_dev_get_stats64(struct net_device *dev, diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile index 3bd0760c76a2..b51d8b071b56 100644 --- a/net/batman-adv/Makefile +++ b/net/batman-adv/Makefile @@ -20,6 +20,7 @@ batman-adv-y += hash.o batman-adv-$(CONFIG_BATMAN_ADV_DEBUG) += log.o batman-adv-y += main.o batman-adv-$(CONFIG_BATMAN_ADV_MCAST) += multicast.o +batman-adv-$(CONFIG_BATMAN_ADV_MCAST) += multicast_forw.o batman-adv-y += netlink.o batman-adv-$(CONFIG_BATMAN_ADV_NC) += network-coding.o batman-adv-y += originator.o diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 37ce6cfb3520..5f46ca3d4bb8 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -20,7 +20,6 @@ #include <linux/if_vlan.h> #include <linux/jhash.h> #include <linux/jiffies.h> -#include <linux/kernel.h> #include <linux/kref.h> #include <linux/list.h> #include <linux/lockdep.h> @@ -31,6 +30,7 @@ #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/spinlock.h> +#include <linux/sprintf.h> #include <linux/stddef.h> #include <linux/string.h> #include <linux/workqueue.h> diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index c120c7c6d25f..757c084ac2d1 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -25,7 +25,6 @@ #include "hard-interface.h" #include "originator.h" -#include "routing.h" #include "send.h" /** @@ -351,18 +350,14 @@ bool batadv_frag_skb_fwd(struct sk_buff *skb, struct batadv_orig_node *orig_node_src) { struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface); - struct batadv_orig_node *orig_node_dst; struct batadv_neigh_node *neigh_node = NULL; struct batadv_frag_packet *packet; u16 total_size; bool ret = false; packet = (struct batadv_frag_packet *)skb->data; - orig_node_dst = batadv_orig_hash_find(bat_priv, packet->dest); - if (!orig_node_dst) - goto out; - neigh_node = batadv_find_router(bat_priv, orig_node_dst, recv_if); + neigh_node = batadv_orig_to_router(bat_priv, packet->dest, recv_if); if (!neigh_node) goto out; @@ -381,7 +376,6 @@ bool batadv_frag_skb_fwd(struct sk_buff *skb, } out: - batadv_orig_node_put(orig_node_dst); batadv_neigh_node_put(neigh_node); return ret; } diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index d26124bc27e1..0ddd8b4b3f4c 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -18,7 +18,6 @@ #include <linux/in.h> #include <linux/ip.h> #include <linux/ipv6.h> -#include <linux/kernel.h> #include <linux/kref.h> #include <linux/list.h> #include <linux/lockdep.h> @@ -29,6 +28,7 @@ #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/spinlock.h> +#include <linux/sprintf.h> #include <linux/stddef.h> #include <linux/udp.h> #include <net/sock.h> diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index e8a449915566..5fc754b0b3f7 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -6,6 +6,7 @@ #include "main.h" +#include <linux/array_size.h> #include <linux/atomic.h> #include <linux/build_bug.h> #include <linux/byteorder/generic.h> @@ -20,7 +21,6 @@ #include <linux/init.h> #include <linux/ip.h> #include <linux/ipv6.h> -#include <linux/kernel.h> #include <linux/kobject.h> #include <linux/kref.h> #include <linux/list.h> @@ -33,6 +33,7 @@ #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/spinlock.h> +#include <linux/sprintf.h> #include <linux/stddef.h> #include <linux/string.h> #include <linux/workqueue.h> @@ -532,6 +533,8 @@ static void batadv_recv_handler_init(void) /* broadcast packet */ batadv_rx_handler[BATADV_BCAST] = batadv_recv_bcast_packet; + /* multicast packet */ + batadv_rx_handler[BATADV_MCAST] = batadv_recv_mcast_packet; /* unicast packets ... */ /* unicast with 4 addresses packet */ diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 10007c5894a1..870dcd7f1786 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -13,7 +13,7 @@ #define BATADV_DRIVER_DEVICE "batman-adv" #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2023.3" +#define BATADV_SOURCE_VERSION "2024.0" #endif /* B.A.T.M.A.N. parameters */ diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index 315394f12c55..d982daea8329 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -25,7 +25,6 @@ #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/jiffies.h> -#include <linux/kernel.h> #include <linux/list.h> #include <linux/lockdep.h> #include <linux/netdevice.h> @@ -36,6 +35,7 @@ #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/spinlock.h> +#include <linux/sprintf.h> #include <linux/stddef.h> #include <linux/string.h> #include <linux/types.h> @@ -236,6 +236,37 @@ static u8 batadv_mcast_mla_rtr_flags_get(struct batadv_priv *bat_priv, } /** + * batadv_mcast_mla_forw_flags_get() - get multicast forwarding flags + * @bat_priv: the bat priv with all the soft interface information + * + * Checks if all active hard interfaces have an MTU larger or equal to 1280 + * bytes (IPv6 minimum MTU). + * + * Return: BATADV_MCAST_HAVE_MC_PTYPE_CAPA if yes, BATADV_NO_FLAGS otherwise. + */ +static u8 batadv_mcast_mla_forw_flags_get(struct batadv_priv *bat_priv) +{ + const struct batadv_hard_iface *hard_iface; + + rcu_read_lock(); + list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { + if (hard_iface->if_status != BATADV_IF_ACTIVE) + continue; + + if (hard_iface->soft_iface != bat_priv->soft_iface) + continue; + + if (hard_iface->net_dev->mtu < IPV6_MIN_MTU) { + rcu_read_unlock(); + return BATADV_NO_FLAGS; + } + } + rcu_read_unlock(); + + return BATADV_MCAST_HAVE_MC_PTYPE_CAPA; +} + +/** * batadv_mcast_mla_flags_get() - get the new multicast flags * @bat_priv: the bat priv with all the soft interface information * @@ -256,6 +287,7 @@ batadv_mcast_mla_flags_get(struct batadv_priv *bat_priv) mla_flags.enabled = 1; mla_flags.tvlv_flags |= batadv_mcast_mla_rtr_flags_get(bat_priv, bridge); + mla_flags.tvlv_flags |= batadv_mcast_mla_forw_flags_get(bat_priv); if (!bridge) return mla_flags; @@ -806,23 +838,25 @@ static void batadv_mcast_flags_log(struct batadv_priv *bat_priv, u8 flags) { bool old_enabled = bat_priv->mcast.mla_flags.enabled; u8 old_flags = bat_priv->mcast.mla_flags.tvlv_flags; - char str_old_flags[] = "[.... . ]"; + char str_old_flags[] = "[.... . .]"; - sprintf(str_old_flags, "[%c%c%c%s%s]", + sprintf(str_old_flags, "[%c%c%c%s%s%c]", (old_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES) ? 'U' : '.', (old_flags & BATADV_MCAST_WANT_ALL_IPV4) ? '4' : '.', (old_flags & BATADV_MCAST_WANT_ALL_IPV6) ? '6' : '.', !(old_flags & BATADV_MCAST_WANT_NO_RTR4) ? "R4" : ". ", - !(old_flags & BATADV_MCAST_WANT_NO_RTR6) ? "R6" : ". "); + !(old_flags & BATADV_MCAST_WANT_NO_RTR6) ? "R6" : ". ", + !(old_flags & BATADV_MCAST_HAVE_MC_PTYPE_CAPA) ? 'P' : '.'); batadv_dbg(BATADV_DBG_MCAST, bat_priv, - "Changing multicast flags from '%s' to '[%c%c%c%s%s]'\n", + "Changing multicast flags from '%s' to '[%c%c%c%s%s%c]'\n", old_enabled ? str_old_flags : "<undefined>", (flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES) ? 'U' : '.', (flags & BATADV_MCAST_WANT_ALL_IPV4) ? '4' : '.', (flags & BATADV_MCAST_WANT_ALL_IPV6) ? '6' : '.', !(flags & BATADV_MCAST_WANT_NO_RTR4) ? "R4" : ". ", - !(flags & BATADV_MCAST_WANT_NO_RTR6) ? "R6" : ". "); + !(flags & BATADV_MCAST_WANT_NO_RTR6) ? "R6" : ". ", + !(flags & BATADV_MCAST_HAVE_MC_PTYPE_CAPA) ? 'P' : '.'); } /** @@ -1136,16 +1170,61 @@ static int batadv_mcast_forw_rtr_count(struct batadv_priv *bat_priv, } /** + * batadv_mcast_forw_mode_by_count() - get forwarding mode by count + * @bat_priv: the bat priv with all the soft interface information + * @skb: the multicast packet to check + * @vid: the vlan identifier + * @is_routable: stores whether the destination is routable + * @count: the number of originators the multicast packet need to be sent to + * + * For a multicast packet with multiple destination originators, checks which + * mode to use. For BATADV_FORW_MCAST it also encapsulates the packet with a + * complete batman-adv multicast header. + * + * Return: + * BATADV_FORW_MCAST: If all nodes have multicast packet routing + * capabilities and an MTU >= 1280 on all hard interfaces (including us) + * and the encapsulated multicast packet with all destination addresses + * would still fit into an 1280 bytes batman-adv multicast packet + * (excluding the outer ethernet frame) and we could successfully push + * the full batman-adv multicast packet header. + * BATADV_FORW_UCASTS: If the packet cannot be sent in a batman-adv + * multicast packet and the amount of batman-adv unicast packets needed + * is smaller or equal to the configured multicast fanout. + * BATADV_FORW_BCAST: Otherwise. + */ +static enum batadv_forw_mode +batadv_mcast_forw_mode_by_count(struct batadv_priv *bat_priv, + struct sk_buff *skb, unsigned short vid, + int is_routable, int count) +{ + unsigned int mcast_hdrlen = batadv_mcast_forw_packet_hdrlen(count); + u8 own_tvlv_flags = bat_priv->mcast.mla_flags.tvlv_flags; + + if (!atomic_read(&bat_priv->mcast.num_no_mc_ptype_capa) && + own_tvlv_flags & BATADV_MCAST_HAVE_MC_PTYPE_CAPA && + skb->len + mcast_hdrlen <= IPV6_MIN_MTU && + batadv_mcast_forw_push(bat_priv, skb, vid, is_routable, count)) + return BATADV_FORW_MCAST; + + if (count <= atomic_read(&bat_priv->multicast_fanout)) + return BATADV_FORW_UCASTS; + + return BATADV_FORW_BCAST; +} + +/** * batadv_mcast_forw_mode() - check on how to forward a multicast packet * @bat_priv: the bat priv with all the soft interface information * @skb: the multicast packet to check + * @vid: the vlan identifier * @is_routable: stores whether the destination is routable * * Return: The forwarding mode as enum batadv_forw_mode. */ enum batadv_forw_mode batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb, - int *is_routable) + unsigned short vid, int *is_routable) { int ret, tt_count, ip_count, unsnoop_count, total_count; bool is_unsnoopable = false; @@ -1175,10 +1254,8 @@ batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb, else if (unsnoop_count) return BATADV_FORW_BCAST; - if (total_count <= atomic_read(&bat_priv->multicast_fanout)) - return BATADV_FORW_UCASTS; - - return BATADV_FORW_BCAST; + return batadv_mcast_forw_mode_by_count(bat_priv, skb, vid, *is_routable, + total_count); } /** @@ -1739,6 +1816,31 @@ static void batadv_mcast_want_rtr6_update(struct batadv_priv *bat_priv, } /** + * batadv_mcast_have_mc_ptype_update() - update multicast packet type counter + * @bat_priv: the bat priv with all the soft interface information + * @orig: the orig_node which multicast state might have changed of + * @mcast_flags: flags indicating the new multicast state + * + * If the BATADV_MCAST_HAVE_MC_PTYPE_CAPA flag of this originator, orig, has + * toggled then this method updates the counter accordingly. + */ +static void batadv_mcast_have_mc_ptype_update(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig, + u8 mcast_flags) +{ + lockdep_assert_held(&orig->mcast_handler_lock); + + /* switched from flag set to unset */ + if (!(mcast_flags & BATADV_MCAST_HAVE_MC_PTYPE_CAPA) && + orig->mcast_flags & BATADV_MCAST_HAVE_MC_PTYPE_CAPA) + atomic_inc(&bat_priv->mcast.num_no_mc_ptype_capa); + /* switched from flag unset to set */ + else if (mcast_flags & BATADV_MCAST_HAVE_MC_PTYPE_CAPA && + !(orig->mcast_flags & BATADV_MCAST_HAVE_MC_PTYPE_CAPA)) + atomic_dec(&bat_priv->mcast.num_no_mc_ptype_capa); +} + +/** * batadv_mcast_tvlv_flags_get() - get multicast flags from an OGM TVLV * @enabled: whether the originator has multicast TVLV support enabled * @tvlv_value: tvlv buffer containing the multicast flags @@ -1806,6 +1908,7 @@ static void batadv_mcast_tvlv_ogm_handler(struct batadv_priv *bat_priv, batadv_mcast_want_ipv6_update(bat_priv, orig, mcast_flags); batadv_mcast_want_rtr4_update(bat_priv, orig, mcast_flags); batadv_mcast_want_rtr6_update(bat_priv, orig, mcast_flags); + batadv_mcast_have_mc_ptype_update(bat_priv, orig, mcast_flags); orig->mcast_flags = mcast_flags; spin_unlock_bh(&orig->mcast_handler_lock); @@ -1820,6 +1923,10 @@ void batadv_mcast_init(struct batadv_priv *bat_priv) batadv_tvlv_handler_register(bat_priv, batadv_mcast_tvlv_ogm_handler, NULL, NULL, BATADV_TVLV_MCAST, 2, BATADV_TVLV_HANDLER_OGM_CIFNOTFND); + batadv_tvlv_handler_register(bat_priv, NULL, NULL, + batadv_mcast_forw_tracker_tvlv_handler, + BATADV_TVLV_MCAST_TRACKER, 1, + BATADV_TVLV_HANDLER_OGM_CIFNOTFND); INIT_DELAYED_WORK(&bat_priv->mcast.work, batadv_mcast_mla_update); batadv_mcast_start_timer(bat_priv); diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h index a9770d8d6d36..d97ee51d26f2 100644 --- a/net/batman-adv/multicast.h +++ b/net/batman-adv/multicast.h @@ -11,6 +11,7 @@ #include <linux/netlink.h> #include <linux/skbuff.h> +#include <linux/types.h> /** * enum batadv_forw_mode - the way a packet should be forwarded as @@ -28,6 +29,12 @@ enum batadv_forw_mode { */ BATADV_FORW_UCASTS, + /** + * @BATADV_FORW_MCAST: forward the packet to some nodes via a + * batman-adv multicast packet + */ + BATADV_FORW_MCAST, + /** @BATADV_FORW_NONE: don't forward, drop it */ BATADV_FORW_NONE, }; @@ -36,7 +43,7 @@ enum batadv_forw_mode { enum batadv_forw_mode batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb, - int *is_routable); + unsigned short vid, int *is_routable); int batadv_mcast_forw_send(struct batadv_priv *bat_priv, struct sk_buff *skb, unsigned short vid, int is_routable); @@ -52,11 +59,23 @@ void batadv_mcast_free(struct batadv_priv *bat_priv); void batadv_mcast_purge_orig(struct batadv_orig_node *orig_node); +/* multicast_forw.c */ + +int batadv_mcast_forw_tracker_tvlv_handler(struct batadv_priv *bat_priv, + struct sk_buff *skb); + +unsigned int batadv_mcast_forw_packet_hdrlen(unsigned int num_dests); + +bool batadv_mcast_forw_push(struct batadv_priv *bat_priv, struct sk_buff *skb, + unsigned short vid, int is_routable, int count); + +int batadv_mcast_forw_mcsend(struct batadv_priv *bat_priv, struct sk_buff *skb); + #else static inline enum batadv_forw_mode batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb, - int *is_routable) + unsigned short vid, int *is_routable) { return BATADV_FORW_BCAST; } @@ -94,6 +113,13 @@ static inline void batadv_mcast_purge_orig(struct batadv_orig_node *orig_node) { } +static inline int batadv_mcast_forw_mcsend(struct batadv_priv *bat_priv, + struct sk_buff *skb) +{ + kfree_skb(skb); + return NET_XMIT_DROP; +} + #endif /* CONFIG_BATMAN_ADV_MCAST */ #endif /* _NET_BATMAN_ADV_MULTICAST_H_ */ diff --git a/net/batman-adv/multicast_forw.c b/net/batman-adv/multicast_forw.c new file mode 100644 index 000000000000..fafd6ba8c056 --- /dev/null +++ b/net/batman-adv/multicast_forw.c @@ -0,0 +1,1178 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) B.A.T.M.A.N. contributors: + * + * Linus Lüssing + */ + +#include "multicast.h" +#include "main.h" + +#include <linux/bug.h> +#include <linux/build_bug.h> +#include <linux/byteorder/generic.h> +#include <linux/compiler.h> +#include <linux/errno.h> +#include <linux/etherdevice.h> +#include <linux/gfp.h> +#include <linux/if_ether.h> +#include <linux/if_vlan.h> +#include <linux/ipv6.h> +#include <linux/limits.h> +#include <linux/netdevice.h> +#include <linux/rculist.h> +#include <linux/rcupdate.h> +#include <linux/skbuff.h> +#include <linux/stddef.h> +#include <linux/string.h> +#include <linux/types.h> +#include <uapi/linux/batadv_packet.h> + +#include "bridge_loop_avoidance.h" +#include "originator.h" +#include "send.h" +#include "translation-table.h" + +#define batadv_mcast_forw_tracker_for_each_dest(dest, num_dests) \ + for (; num_dests; num_dests--, (dest) += ETH_ALEN) + +#define batadv_mcast_forw_tracker_for_each_dest2(dest1, dest2, num_dests) \ + for (; num_dests; num_dests--, (dest1) += ETH_ALEN, (dest2) += ETH_ALEN) + +/** + * batadv_mcast_forw_skb_push() - skb_push and memorize amount of pushed bytes + * @skb: the skb to push onto + * @size: the amount of bytes to push + * @len: stores the total amount of bytes pushed + * + * Performs an skb_push() onto the given skb and adds the amount of pushed bytes + * to the given len pointer. + * + * Return: the return value of the skb_push() call. + */ +static void *batadv_mcast_forw_skb_push(struct sk_buff *skb, size_t size, + unsigned short *len) +{ + *len += size; + return skb_push(skb, size); +} + +/** + * batadv_mcast_forw_push_padding() - push 2 padding bytes to skb's front + * @skb: the skb to push onto + * @tvlv_len: stores the amount of currently pushed TVLV bytes + * + * Pushes two padding bytes to the front of the given skb. + * + * Return: On success a pointer to the first byte of the two pushed padding + * bytes within the skb. NULL otherwise. + */ +static char * +batadv_mcast_forw_push_padding(struct sk_buff *skb, unsigned short *tvlv_len) +{ + const int pad_len = 2; + char *padding; + + if (skb_headroom(skb) < pad_len) + return NULL; + + padding = batadv_mcast_forw_skb_push(skb, pad_len, tvlv_len); + memset(padding, 0, pad_len); + + return padding; +} + +/** + * batadv_mcast_forw_push_est_padding() - push padding bytes if necessary + * @skb: the skb to potentially push the padding onto + * @count: the (estimated) number of originators the multicast packet needs to + * be sent to + * @tvlv_len: stores the amount of currently pushed TVLV bytes + * + * If the number of destination entries is even then this adds two + * padding bytes to the end of the tracker TVLV. + * + * Return: true on success or if no padding is needed, false otherwise. + */ +static bool +batadv_mcast_forw_push_est_padding(struct sk_buff *skb, int count, + unsigned short *tvlv_len) +{ + if (!(count % 2) && !batadv_mcast_forw_push_padding(skb, tvlv_len)) + return false; + + return true; +} + +/** + * batadv_mcast_forw_orig_entry() - get orig_node from an hlist node + * @node: the hlist node to get the orig_node from + * @entry_offset: the offset of the hlist node within the orig_node struct + * + * Return: The orig_node containing the hlist node on success, NULL on error. + */ +static struct batadv_orig_node * +batadv_mcast_forw_orig_entry(struct hlist_node *node, + size_t entry_offset) +{ + /* sanity check */ + switch (entry_offset) { + case offsetof(struct batadv_orig_node, mcast_want_all_ipv4_node): + case offsetof(struct batadv_orig_node, mcast_want_all_ipv6_node): + case offsetof(struct batadv_orig_node, mcast_want_all_rtr4_node): + case offsetof(struct batadv_orig_node, mcast_want_all_rtr6_node): + break; + default: + WARN_ON(1); + return NULL; + } + + return (struct batadv_orig_node *)((void *)node - entry_offset); +} + +/** + * batadv_mcast_forw_push_dest() - push an originator MAC address onto an skb + * @bat_priv: the bat priv with all the soft interface information + * @skb: the skb to push the destination address onto + * @vid: the vlan identifier + * @orig_node: the originator node to get the MAC address from + * @num_dests: a pointer to store the number of pushed addresses in + * @tvlv_len: stores the amount of currently pushed TVLV bytes + * + * If the orig_node is a BLA backbone gateway, if there is not enough skb + * headroom available or if num_dests is already at its maximum (65535) then + * neither the skb nor num_dests is changed. Otherwise the originator's MAC + * address is pushed onto the given skb and num_dests incremented by one. + * + * Return: true if the orig_node is a backbone gateway or if an orig address + * was pushed successfully, false otherwise. + */ +static bool batadv_mcast_forw_push_dest(struct batadv_priv *bat_priv, + struct sk_buff *skb, unsigned short vid, + struct batadv_orig_node *orig_node, + unsigned short *num_dests, + unsigned short *tvlv_len) +{ + BUILD_BUG_ON(sizeof_field(struct batadv_tvlv_mcast_tracker, num_dests) + != sizeof(__be16)); + + /* Avoid sending to other BLA gateways - they already got the frame from + * the LAN side we share with them. + * TODO: Refactor to take BLA into account earlier in mode check. + */ + if (batadv_bla_is_backbone_gw_orig(bat_priv, orig_node->orig, vid)) + return true; + + if (skb_headroom(skb) < ETH_ALEN || *num_dests == U16_MAX) + return false; + + batadv_mcast_forw_skb_push(skb, ETH_ALEN, tvlv_len); + ether_addr_copy(skb->data, orig_node->orig); + (*num_dests)++; + + return true; +} + +/** + * batadv_mcast_forw_push_dests_list() - push originators from list onto an skb + * @bat_priv: the bat priv with all the soft interface information + * @skb: the skb to push the destination addresses onto + * @vid: the vlan identifier + * @head: the list to gather originators from + * @entry_offset: offset of an hlist node in an orig_node structure + * @num_dests: a pointer to store the number of pushed addresses in + * @tvlv_len: stores the amount of currently pushed TVLV bytes + * + * Push the MAC addresses of all originators in the given list onto the given + * skb. + * + * Return: true on success, false otherwise. + */ +static int batadv_mcast_forw_push_dests_list(struct batadv_priv *bat_priv, + struct sk_buff *skb, + unsigned short vid, + struct hlist_head *head, + size_t entry_offset, + unsigned short *num_dests, + unsigned short *tvlv_len) +{ + struct hlist_node *node; + struct batadv_orig_node *orig_node; + + rcu_read_lock(); + __hlist_for_each_rcu(node, head) { + orig_node = batadv_mcast_forw_orig_entry(node, entry_offset); + if (!orig_node || + !batadv_mcast_forw_push_dest(bat_priv, skb, vid, orig_node, + num_dests, tvlv_len)) { + rcu_read_unlock(); + return false; + } + } + rcu_read_unlock(); + + return true; +} + +/** + * batadv_mcast_forw_push_tt() - push originators with interest through TT + * @bat_priv: the bat priv with all the soft interface information + * @skb: the skb to push the destination addresses onto + * @vid: the vlan identifier + * @num_dests: a pointer to store the number of pushed addresses in + * @tvlv_len: stores the amount of currently pushed TVLV bytes + * + * Push the MAC addresses of all originators which have indicated interest in + * this multicast packet through the translation table onto the given skb. + * + * Return: true on success, false otherwise. + */ +static bool +batadv_mcast_forw_push_tt(struct batadv_priv *bat_priv, struct sk_buff *skb, + unsigned short vid, unsigned short *num_dests, + unsigned short *tvlv_len) +{ + struct batadv_tt_orig_list_entry *orig_entry; + + struct batadv_tt_global_entry *tt_global; + const u8 *addr = eth_hdr(skb)->h_dest; + + /* ok */ + int ret = true; + + tt_global = batadv_tt_global_hash_find(bat_priv, addr, vid); + if (!tt_global) + goto out; + + rcu_read_lock(); + hlist_for_each_entry_rcu(orig_entry, &tt_global->orig_list, list) { + if (!batadv_mcast_forw_push_dest(bat_priv, skb, vid, + orig_entry->orig_node, + num_dests, tvlv_len)) { + ret = false; + break; + } + } + rcu_read_unlock(); + + batadv_tt_global_entry_put(tt_global); + +out: + return ret; +} + +/** + * batadv_mcast_forw_push_want_all() - push originators with want-all flag + * @bat_priv: the bat priv with all the soft interface information + * @skb: the skb to push the destination addresses onto + * @vid: the vlan identifier + * @num_dests: a pointer to store the number of pushed addresses in + * @tvlv_len: stores the amount of currently pushed TVLV bytes + * + * Push the MAC addresses of all originators which have indicated interest in + * this multicast packet through the want-all flag onto the given skb. + * + * Return: true on success, false otherwise. + */ +static bool batadv_mcast_forw_push_want_all(struct batadv_priv *bat_priv, + struct sk_buff *skb, + unsigned short vid, + unsigned short *num_dests, + unsigned short *tvlv_len) +{ + struct hlist_head *head = NULL; + size_t offset; + int ret; + + switch (eth_hdr(skb)->h_proto) { + case htons(ETH_P_IP): + head = &bat_priv->mcast.want_all_ipv4_list; + offset = offsetof(struct batadv_orig_node, + mcast_want_all_ipv4_node); + break; + case htons(ETH_P_IPV6): + head = &bat_priv->mcast.want_all_ipv6_list; + offset = offsetof(struct batadv_orig_node, + mcast_want_all_ipv6_node); + break; + default: + return false; + } + + ret = batadv_mcast_forw_push_dests_list(bat_priv, skb, vid, head, + offset, num_dests, tvlv_len); + if (!ret) + return false; + + return true; +} + +/** + * batadv_mcast_forw_push_want_rtr() - push originators with want-router flag + * @bat_priv: the bat priv with all the soft interface information + * @skb: the skb to push the destination addresses onto + * @vid: the vlan identifier + * @num_dests: a pointer to store the number of pushed addresses in + * @tvlv_len: stores the amount of currently pushed TVLV bytes + * + * Push the MAC addresses of all originators which have indicated interest in + * this multicast packet through the want-all-rtr flag onto the given skb. + * + * Return: true on success, false otherwise. + */ +static bool batadv_mcast_forw_push_want_rtr(struct batadv_priv *bat_priv, + struct sk_buff *skb, + unsigned short vid, + unsigned short *num_dests, + unsigned short *tvlv_len) +{ + struct hlist_head *head = NULL; + size_t offset; + int ret; + + switch (eth_hdr(skb)->h_proto) { + case htons(ETH_P_IP): + head = &bat_priv->mcast.want_all_rtr4_list; + offset = offsetof(struct batadv_orig_node, + mcast_want_all_rtr4_node); + break; + case htons(ETH_P_IPV6): + head = &bat_priv->mcast.want_all_rtr6_list; + offset = offsetof(struct batadv_orig_node, + mcast_want_all_rtr6_node); + break; + default: + return false; + } + + ret = batadv_mcast_forw_push_dests_list(bat_priv, skb, vid, head, + offset, num_dests, tvlv_len); + if (!ret) + return false; + + return true; +} + +/** + * batadv_mcast_forw_scrape() - remove bytes within skb data + * @skb: the skb to remove bytes from + * @offset: the offset from the skb data from which to scrape + * @len: the amount of bytes to scrape starting from the offset + * + * Scrapes/removes len bytes from the given skb at the given offset from the + * skb data. + * + * Caller needs to ensure that the region from the skb data's start up + * to/including the to be removed bytes are linearized. + */ +static void batadv_mcast_forw_scrape(struct sk_buff *skb, + unsigned short offset, + unsigned short len) +{ + char *to, *from; + + SKB_LINEAR_ASSERT(skb); + + to = skb_pull(skb, len); + from = to - len; + + memmove(to, from, offset); +} + +/** + * batadv_mcast_forw_push_scrape_padding() - remove TVLV padding + * @skb: the skb to potentially adjust the TVLV's padding on + * @tvlv_len: stores the amount of currently pushed TVLV bytes + * + * Remove two padding bytes from the end of the multicast tracker TVLV, + * from before the payload data. + * + * Caller needs to ensure that the TVLV bytes are linearized. + */ +static void batadv_mcast_forw_push_scrape_padding(struct sk_buff *skb, + unsigned short *tvlv_len) +{ + const int pad_len = 2; + + batadv_mcast_forw_scrape(skb, *tvlv_len - pad_len, pad_len); + *tvlv_len -= pad_len; +} + +/** + * batadv_mcast_forw_push_insert_padding() - insert TVLV padding + * @skb: the skb to potentially adjust the TVLV's padding on + * @tvlv_len: stores the amount of currently pushed TVLV bytes + * + * Inserts two padding bytes at the end of the multicast tracker TVLV, + * before the payload data in the given skb. + * + * Return: true on success, false otherwise. + */ +static bool batadv_mcast_forw_push_insert_padding(struct sk_buff *skb, + unsigned short *tvlv_len) +{ + unsigned short offset = *tvlv_len; + char *to, *from = skb->data; + + to = batadv_mcast_forw_push_padding(skb, tvlv_len); + if (!to) + return false; + + memmove(to, from, offset); + memset(to + offset, 0, *tvlv_len - offset); + return true; +} + +/** + * batadv_mcast_forw_push_adjust_padding() - adjust padding if necessary + * @skb: the skb to potentially adjust the TVLV's padding on + * @count: the estimated number of originators the multicast packet needs to + * be sent to + * @num_dests_pushed: the number of originators that were actually added to the + * multicast packet's tracker TVLV + * @tvlv_len: stores the amount of currently pushed TVLV bytes + * + * Adjusts the padding in the multicast packet's tracker TVLV depending on the + * initially estimated amount of destinations versus the amount of destinations + * that were actually added to the tracker TVLV. + * + * If the initial estimate was correct or at least the oddness was the same then + * no padding adjustment is performed. + * If the initially estimated number was even, so padding was initially added, + * but it turned out to be odd then padding is removed. + * If the initially estimated number was odd, so no padding was initially added, + * but it turned out to be even then padding is added. + * + * Return: true if no padding adjustment is needed or the adjustment was + * successful, false otherwise. + */ +static bool +batadv_mcast_forw_push_adjust_padding(struct sk_buff *skb, int *count, + unsigned short num_dests_pushed, + unsigned short *tvlv_len) +{ + int ret = true; + + if (likely((num_dests_pushed % 2) == (*count % 2))) + goto out; + + /** + * estimated even number of destinations, but turned out to be odd + * -> remove padding + */ + if (!(*count % 2) && (num_dests_pushed % 2)) + batadv_mcast_forw_push_scrape_padding(skb, tvlv_len); + /** + * estimated odd number of destinations, but turned out to be even + * -> add padding + */ + else if ((*count % 2) && (!(num_dests_pushed % 2))) + ret = batadv_mcast_forw_push_insert_padding(skb, tvlv_len); + +out: + *count = num_dests_pushed; + return ret; +} + +/** + * batadv_mcast_forw_push_dests() - push originator addresses onto an skb + * @bat_priv: the bat priv with all the soft interface information + * @skb: the skb to push the destination addresses onto + * @vid: the vlan identifier + * @is_routable: indicates whether the destination is routable + * @count: the number of originators the multicast packet needs to be sent to + * @tvlv_len: stores the amount of currently pushed TVLV bytes + * + * Push the MAC addresses of all originators which have indicated interest in + * this multicast packet onto the given skb. + * + * Return: -ENOMEM if there is not enough skb headroom available. Otherwise, on + * success 0. + */ +static int +batadv_mcast_forw_push_dests(struct batadv_priv *bat_priv, struct sk_buff *skb, + unsigned short vid, int is_routable, int *count, + unsigned short *tvlv_len) +{ + unsigned short num_dests = 0; + + if (!batadv_mcast_forw_push_est_padding(skb, *count, tvlv_len)) + goto err; + + if (!batadv_mcast_forw_push_tt(bat_priv, skb, vid, &num_dests, + tvlv_len)) + goto err; + + if (!batadv_mcast_forw_push_want_all(bat_priv, skb, vid, &num_dests, + tvlv_len)) + goto err; + + if (is_routable && + !batadv_mcast_forw_push_want_rtr(bat_priv, skb, vid, &num_dests, + tvlv_len)) + goto err; + + if (!batadv_mcast_forw_push_adjust_padding(skb, count, num_dests, + tvlv_len)) + goto err; + + return 0; +err: + return -ENOMEM; +} + +/** + * batadv_mcast_forw_push_tracker() - push a multicast tracker TVLV header + * @skb: the skb to push the tracker TVLV onto + * @num_dests: the number of destination addresses to set in the header + * @tvlv_len: stores the amount of currently pushed TVLV bytes + * + * Pushes a multicast tracker TVLV header onto the given skb, including the + * generic TVLV header but excluding the destination MAC addresses. + * + * The provided num_dests value is taken into consideration to set the + * num_dests field in the tracker header and to set the appropriate TVLV length + * value fields. + * + * Return: -ENOMEM if there is not enough skb headroom available. Otherwise, on + * success 0. + */ +static int batadv_mcast_forw_push_tracker(struct sk_buff *skb, int num_dests, + unsigned short *tvlv_len) +{ + struct batadv_tvlv_mcast_tracker *mcast_tracker; + struct batadv_tvlv_hdr *tvlv_hdr; + unsigned int tvlv_value_len; + + if (skb_headroom(skb) < sizeof(*mcast_tracker) + sizeof(*tvlv_hdr)) + return -ENOMEM; + + tvlv_value_len = sizeof(*mcast_tracker) + *tvlv_len; + if (tvlv_value_len + sizeof(*tvlv_hdr) > U16_MAX) + return -ENOMEM; + + batadv_mcast_forw_skb_push(skb, sizeof(*mcast_tracker), tvlv_len); + mcast_tracker = (struct batadv_tvlv_mcast_tracker *)skb->data; + mcast_tracker->num_dests = htons(num_dests); + + skb_reset_network_header(skb); + + batadv_mcast_forw_skb_push(skb, sizeof(*tvlv_hdr), tvlv_len); + tvlv_hdr = (struct batadv_tvlv_hdr *)skb->data; + tvlv_hdr->type = BATADV_TVLV_MCAST_TRACKER; + tvlv_hdr->version = 1; + tvlv_hdr->len = htons(tvlv_value_len); + + return 0; +} + +/** + * batadv_mcast_forw_push_tvlvs() - push a multicast tracker TVLV onto an skb + * @bat_priv: the bat priv with all the soft interface information + * @skb: the skb to push the tracker TVLV onto + * @vid: the vlan identifier + * @is_routable: indicates whether the destination is routable + * @count: the number of originators the multicast packet needs to be sent to + * @tvlv_len: stores the amount of currently pushed TVLV bytes + * + * Pushes a multicast tracker TVLV onto the given skb, including the collected + * destination MAC addresses and the generic TVLV header. + * + * Return: -ENOMEM if there is not enough skb headroom available. Otherwise, on + * success 0. + */ +static int +batadv_mcast_forw_push_tvlvs(struct batadv_priv *bat_priv, struct sk_buff *skb, + unsigned short vid, int is_routable, int count, + unsigned short *tvlv_len) +{ + int ret; + + ret = batadv_mcast_forw_push_dests(bat_priv, skb, vid, is_routable, + &count, tvlv_len); + if (ret < 0) + return ret; + + ret = batadv_mcast_forw_push_tracker(skb, count, tvlv_len); + if (ret < 0) + return ret; + + return 0; +} + +/** + * batadv_mcast_forw_push_hdr() - push a multicast packet header onto an skb + * @skb: the skb to push the header onto + * @tvlv_len: the total TVLV length value to set in the header + * + * Pushes a batman-adv multicast packet header onto the given skb and sets + * the provided total TVLV length value in it. + * + * Caller needs to ensure enough skb headroom is available. + * + * Return: -ENOMEM if there is not enough skb headroom available. Otherwise, on + * success 0. + */ +static int +batadv_mcast_forw_push_hdr(struct sk_buff *skb, unsigned short tvlv_len) +{ + struct batadv_mcast_packet *mcast_packet; + + if (skb_headroom(skb) < sizeof(*mcast_packet)) + return -ENOMEM; + + skb_push(skb, sizeof(*mcast_packet)); + + mcast_packet = (struct batadv_mcast_packet *)skb->data; + mcast_packet->version = BATADV_COMPAT_VERSION; + mcast_packet->ttl = BATADV_TTL; + mcast_packet->packet_type = BATADV_MCAST; + mcast_packet->reserved = 0; + mcast_packet->tvlv_len = htons(tvlv_len); + + return 0; +} + +/** + * batadv_mcast_forw_scrub_dests() - scrub destinations in a tracker TVLV + * @bat_priv: the bat priv with all the soft interface information + * @comp_neigh: next hop neighbor to scrub+collect destinations for + * @dest: start MAC entry in original skb's tracker TVLV + * @next_dest: start MAC entry in to be sent skb's tracker TVLV + * @num_dests: number of remaining destination MAC entries to iterate over + * + * This sorts destination entries into either the original batman-adv + * multicast packet or the skb (copy) that is going to be sent to comp_neigh + * next. + * + * In preparation for the next, to be (unicast) transmitted batman-adv multicast + * packet skb to be sent to the given neighbor node, tries to collect all + * originator MAC addresses that have the given neighbor node as their next hop + * in the to be transmitted skb (copy), which next_dest points into. That is we + * zero all destination entries in next_dest which do not have comp_neigh as + * their next hop. And zero all destination entries in the original skb that + * would have comp_neigh as their next hop (to avoid redundant transmissions and + * duplicated payload later). + */ +static void +batadv_mcast_forw_scrub_dests(struct batadv_priv *bat_priv, + struct batadv_neigh_node *comp_neigh, u8 *dest, + u8 *next_dest, u16 num_dests) +{ + struct batadv_neigh_node *next_neigh; + + /* skip first entry, this is what we are comparing with */ + eth_zero_addr(dest); + dest += ETH_ALEN; + next_dest += ETH_ALEN; + num_dests--; + + batadv_mcast_forw_tracker_for_each_dest2(dest, next_dest, num_dests) { + if (is_zero_ether_addr(next_dest)) + continue; + + /* sanity check, we expect unicast destinations */ + if (is_multicast_ether_addr(next_dest)) { + eth_zero_addr(dest); + eth_zero_addr(next_dest); + continue; + } + + next_neigh = batadv_orig_to_router(bat_priv, next_dest, NULL); + if (!next_neigh) { + eth_zero_addr(next_dest); + continue; + } + + if (!batadv_compare_eth(next_neigh->addr, comp_neigh->addr)) { + eth_zero_addr(next_dest); + batadv_neigh_node_put(next_neigh); + continue; + } + + /* found an entry for our next packet to transmit, so remove it + * from the original packet + */ + eth_zero_addr(dest); + batadv_neigh_node_put(next_neigh); + } +} + +/** + * batadv_mcast_forw_shrink_fill() - swap slot with next non-zero destination + * @slot: the to be filled zero-MAC destination entry in a tracker TVLV + * @num_dests_slot: remaining entries in tracker TVLV from/including slot + * + * Searches for the next non-zero-MAC destination entry in a tracker TVLV after + * the given slot pointer. And if found, swaps it with the zero-MAC destination + * entry which the slot points to. + * + * Return: true if slot was swapped/filled successfully, false otherwise. + */ +static bool batadv_mcast_forw_shrink_fill(u8 *slot, u16 num_dests_slot) +{ + u16 num_dests_filler; + u8 *filler; + + /* sanity check, should not happen */ + if (!num_dests_slot) + return false; + + num_dests_filler = num_dests_slot - 1; + filler = slot + ETH_ALEN; + + /* find a candidate to fill the empty slot */ + batadv_mcast_forw_tracker_for_each_dest(filler, num_dests_filler) { + if (is_zero_ether_addr(filler)) + continue; + + ether_addr_copy(slot, filler); + eth_zero_addr(filler); + return true; + } + + return false; +} + +/** + * batadv_mcast_forw_shrink_pack_dests() - pack destinations of a tracker TVLV + * @skb: the batman-adv multicast packet to compact destinations in + * + * Compacts the originator destination MAC addresses in the multicast tracker + * TVLV of the given multicast packet. This is done by moving all non-zero + * MAC addresses in direction of the skb head and all zero MAC addresses in skb + * tail direction, within the multicast tracker TVLV. + * + * Return: The number of consecutive zero MAC address destinations which are + * now at the end of the multicast tracker TVLV. + */ +static int batadv_mcast_forw_shrink_pack_dests(struct sk_buff *skb) +{ + struct batadv_tvlv_mcast_tracker *mcast_tracker; + unsigned char *skb_net_hdr; + u16 num_dests_slot; + u8 *slot; + + skb_net_hdr = skb_network_header(skb); + mcast_tracker = (struct batadv_tvlv_mcast_tracker *)skb_net_hdr; + num_dests_slot = ntohs(mcast_tracker->num_dests); + + slot = (u8 *)mcast_tracker + sizeof(*mcast_tracker); + + batadv_mcast_forw_tracker_for_each_dest(slot, num_dests_slot) { + /* find an empty slot */ + if (!is_zero_ether_addr(slot)) + continue; + + if (!batadv_mcast_forw_shrink_fill(slot, num_dests_slot)) + /* could not find a filler, so we successfully packed + * and can stop - and must not reduce num_dests_slot! + */ + break; + } + + /* num_dests_slot is now the amount of reduced, zeroed + * destinations at the end of the tracker TVLV + */ + return num_dests_slot; +} + +/** + * batadv_mcast_forw_shrink_align_offset() - get new alignment offset + * @num_dests_old: the old, to be updated amount of destination nodes + * @num_dests_reduce: the number of destinations that were removed + * + * Calculates the amount of potential extra alignment offset that is needed to + * adjust the TVLV padding after the change in destination nodes. + * + * Return: + * 0: If no change to padding is needed. + * 2: If padding needs to be removed. + * -2: If padding needs to be added. + */ +static short +batadv_mcast_forw_shrink_align_offset(unsigned int num_dests_old, + unsigned int num_dests_reduce) +{ + /* even amount of removed destinations -> no alignment change */ + if (!(num_dests_reduce % 2)) + return 0; + + /* even to odd amount of destinations -> remove padding */ + if (!(num_dests_old % 2)) + return 2; + + /* odd to even amount of destinations -> add padding */ + return -2; +} + +/** + * batadv_mcast_forw_shrink_update_headers() - update shrunk mc packet headers + * @skb: the batman-adv multicast packet to update headers of + * @num_dests_reduce: the number of destinations that were removed + * + * This updates any fields of a batman-adv multicast packet that are affected + * by the reduced number of destinations in the multicast tracket TVLV. In + * particular this updates: + * + * The num_dest field of the multicast tracker TVLV. + * The TVLV length field of the according generic TVLV header. + * The batman-adv multicast packet's total TVLV length field. + * + * Return: The offset in skb's tail direction at which the new batman-adv + * multicast packet header needs to start. + */ +static unsigned int +batadv_mcast_forw_shrink_update_headers(struct sk_buff *skb, + unsigned int num_dests_reduce) +{ + struct batadv_tvlv_mcast_tracker *mcast_tracker; + struct batadv_mcast_packet *mcast_packet; + struct batadv_tvlv_hdr *tvlv_hdr; + unsigned char *skb_net_hdr; + unsigned int offset; + short align_offset; + u16 num_dests; + + skb_net_hdr = skb_network_header(skb); + mcast_tracker = (struct batadv_tvlv_mcast_tracker *)skb_net_hdr; + num_dests = ntohs(mcast_tracker->num_dests); + + align_offset = batadv_mcast_forw_shrink_align_offset(num_dests, + num_dests_reduce); + offset = ETH_ALEN * num_dests_reduce + align_offset; + num_dests -= num_dests_reduce; + + /* update tracker header */ + mcast_tracker->num_dests = htons(num_dests); + + /* update tracker's tvlv header's length field */ + tvlv_hdr = (struct batadv_tvlv_hdr *)(skb_network_header(skb) - + sizeof(*tvlv_hdr)); + tvlv_hdr->len = htons(ntohs(tvlv_hdr->len) - offset); + + /* update multicast packet header's tvlv length field */ + mcast_packet = (struct batadv_mcast_packet *)skb->data; + mcast_packet->tvlv_len = htons(ntohs(mcast_packet->tvlv_len) - offset); + + return offset; +} + +/** + * batadv_mcast_forw_shrink_move_headers() - move multicast headers by offset + * @skb: the batman-adv multicast packet to move headers for + * @offset: a non-negative offset to move headers by, towards the skb tail + * + * Moves the batman-adv multicast packet header, its multicast tracker TVLV and + * any TVLVs in between by the given offset in direction towards the tail. + */ +static void +batadv_mcast_forw_shrink_move_headers(struct sk_buff *skb, unsigned int offset) +{ + struct batadv_tvlv_mcast_tracker *mcast_tracker; + unsigned char *skb_net_hdr; + unsigned int len; + u16 num_dests; + + skb_net_hdr = skb_network_header(skb); + mcast_tracker = (struct batadv_tvlv_mcast_tracker *)skb_net_hdr; + num_dests = ntohs(mcast_tracker->num_dests); + len = skb_network_offset(skb) + sizeof(*mcast_tracker); + len += num_dests * ETH_ALEN; + + batadv_mcast_forw_scrape(skb, len, offset); +} + +/** + * batadv_mcast_forw_shrink_tracker() - remove zero addresses in a tracker tvlv + * @skb: the batman-adv multicast packet to (potentially) shrink + * + * Removes all destinations with a zero MAC addresses (00:00:00:00:00:00) from + * the given batman-adv multicast packet's tracker TVLV and updates headers + * accordingly to maintain a valid batman-adv multicast packet. + */ +static void batadv_mcast_forw_shrink_tracker(struct sk_buff *skb) +{ + unsigned int offset; + u16 dests_reduced; + + dests_reduced = batadv_mcast_forw_shrink_pack_dests(skb); + if (!dests_reduced) + return; + + offset = batadv_mcast_forw_shrink_update_headers(skb, dests_reduced); + batadv_mcast_forw_shrink_move_headers(skb, offset); +} + +/** + * batadv_mcast_forw_packet() - forward a batman-adv multicast packet + * @bat_priv: the bat priv with all the soft interface information + * @skb: the received or locally generated batman-adv multicast packet + * @local_xmit: indicates that the packet was locally generated and not received + * + * Parses the tracker TVLV of a batman-adv multicast packet and forwards the + * packet as indicated in this TVLV. + * + * Caller needs to set the skb network header to the start of the multicast + * tracker TVLV (excluding the generic TVLV header) and the skb transport header + * to the next byte after this multicast tracker TVLV. + * + * Caller needs to free the skb. + * + * Return: NET_RX_SUCCESS or NET_RX_DROP on success or a negative error + * code on failure. NET_RX_SUCCESS if the received packet is supposed to be + * decapsulated and forwarded to the own soft interface, NET_RX_DROP otherwise. + */ +static int batadv_mcast_forw_packet(struct batadv_priv *bat_priv, + struct sk_buff *skb, bool local_xmit) +{ + struct batadv_tvlv_mcast_tracker *mcast_tracker; + struct batadv_neigh_node *neigh_node; + unsigned long offset, num_dests_off; + struct sk_buff *nexthop_skb; + unsigned char *skb_net_hdr; + bool local_recv = false; + unsigned int tvlv_len; + bool xmitted = false; + u8 *dest, *next_dest; + u16 num_dests; + int ret; + + /* (at least) TVLV part needs to be linearized */ + SKB_LINEAR_ASSERT(skb); + + /* check if num_dests is within skb length */ + num_dests_off = offsetof(struct batadv_tvlv_mcast_tracker, num_dests); + if (num_dests_off > skb_network_header_len(skb)) + return -EINVAL; + + skb_net_hdr = skb_network_header(skb); + mcast_tracker = (struct batadv_tvlv_mcast_tracker *)skb_net_hdr; + num_dests = ntohs(mcast_tracker->num_dests); + + dest = (u8 *)mcast_tracker + sizeof(*mcast_tracker); + + /* check if full tracker tvlv is within skb length */ + tvlv_len = sizeof(*mcast_tracker) + ETH_ALEN * num_dests; + if (tvlv_len > skb_network_header_len(skb)) + return -EINVAL; + + /* invalidate checksum: */ + skb->ip_summed = CHECKSUM_NONE; + + batadv_mcast_forw_tracker_for_each_dest(dest, num_dests) { + if (is_zero_ether_addr(dest)) + continue; + + /* only unicast originator addresses supported */ + if (is_multicast_ether_addr(dest)) { + eth_zero_addr(dest); + continue; + } + + if (batadv_is_my_mac(bat_priv, dest)) { + eth_zero_addr(dest); + local_recv = true; + continue; + } + + neigh_node = batadv_orig_to_router(bat_priv, dest, NULL); + if (!neigh_node) { + eth_zero_addr(dest); + continue; + } + + nexthop_skb = skb_copy(skb, GFP_ATOMIC); + if (!nexthop_skb) { + batadv_neigh_node_put(neigh_node); + return -ENOMEM; + } + + offset = dest - skb->data; + next_dest = nexthop_skb->data + offset; + + batadv_mcast_forw_scrub_dests(bat_priv, neigh_node, dest, + next_dest, num_dests); + batadv_mcast_forw_shrink_tracker(nexthop_skb); + + batadv_inc_counter(bat_priv, BATADV_CNT_MCAST_TX); + batadv_add_counter(bat_priv, BATADV_CNT_MCAST_TX_BYTES, + nexthop_skb->len + ETH_HLEN); + xmitted = true; + ret = batadv_send_unicast_skb(nexthop_skb, neigh_node); + + batadv_neigh_node_put(neigh_node); + + if (ret < 0) + return ret; + } + + if (xmitted) { + if (local_xmit) { + batadv_inc_counter(bat_priv, BATADV_CNT_MCAST_TX_LOCAL); + batadv_add_counter(bat_priv, + BATADV_CNT_MCAST_TX_LOCAL_BYTES, + skb->len - + skb_transport_offset(skb)); + } else { + batadv_inc_counter(bat_priv, BATADV_CNT_MCAST_FWD); + batadv_add_counter(bat_priv, BATADV_CNT_MCAST_FWD_BYTES, + skb->len + ETH_HLEN); + } + } + + if (local_recv) + return NET_RX_SUCCESS; + else + return NET_RX_DROP; +} + +/** + * batadv_mcast_forw_tracker_tvlv_handler() - handle an mcast tracker tvlv + * @bat_priv: the bat priv with all the soft interface information + * @skb: the received batman-adv multicast packet + * + * Parses the tracker TVLV of an incoming batman-adv multicast packet and + * forwards the packet as indicated in this TVLV. + * + * Caller needs to set the skb network header to the start of the multicast + * tracker TVLV (excluding the generic TVLV header) and the skb transport header + * to the next byte after this multicast tracker TVLV. + * + * Caller needs to free the skb. + * + * Return: NET_RX_SUCCESS or NET_RX_DROP on success or a negative error + * code on failure. NET_RX_SUCCESS if the received packet is supposed to be + * decapsulated and forwarded to the own soft interface, NET_RX_DROP otherwise. + */ +int batadv_mcast_forw_tracker_tvlv_handler(struct batadv_priv *bat_priv, + struct sk_buff *skb) +{ + return batadv_mcast_forw_packet(bat_priv, skb, false); +} + +/** + * batadv_mcast_forw_packet_hdrlen() - multicast packet header length + * @num_dests: number of destination nodes + * + * Calculates the total batman-adv multicast packet header length for a given + * number of destination nodes (excluding the outer ethernet frame). + * + * Return: The calculated total batman-adv multicast packet header length. + */ +unsigned int batadv_mcast_forw_packet_hdrlen(unsigned int num_dests) +{ + /** + * If the number of destination entries is even then we need to add + * two byte padding to the tracker TVLV. + */ + int padding = (!(num_dests % 2)) ? 2 : 0; + + return padding + num_dests * ETH_ALEN + + sizeof(struct batadv_tvlv_mcast_tracker) + + sizeof(struct batadv_tvlv_hdr) + + sizeof(struct batadv_mcast_packet); +} + +/** + * batadv_mcast_forw_expand_head() - expand headroom for an mcast packet + * @bat_priv: the bat priv with all the soft interface information + * @skb: the multicast packet to send + * + * Tries to expand an skb's headroom so that its head to tail is 1298 + * bytes (minimum IPv6 MTU + vlan ethernet header size) large. + * + * Return: -EINVAL if the given skb's length is too large or -ENOMEM on memory + * allocation failure. Otherwise, on success, zero is returned. + */ +static int batadv_mcast_forw_expand_head(struct batadv_priv *bat_priv, + struct sk_buff *skb) +{ + int hdr_size = VLAN_ETH_HLEN + IPV6_MIN_MTU - skb->len; + + /* TODO: Could be tightened to actual number of destination nodes? + * But it's tricky, number of destinations might have increased since + * we last checked. + */ + if (hdr_size < 0) { + /* batadv_mcast_forw_mode_check_count() should ensure we do not + * end up here + */ + WARN_ON(1); + return -EINVAL; + } + + if (skb_headroom(skb) < hdr_size && + pskb_expand_head(skb, hdr_size, 0, GFP_ATOMIC) < 0) + return -ENOMEM; + + return 0; +} + +/** + * batadv_mcast_forw_push() - encapsulate skb in a batman-adv multicast packet + * @bat_priv: the bat priv with all the soft interface information + * @skb: the multicast packet to encapsulate and send + * @vid: the vlan identifier + * @is_routable: indicates whether the destination is routable + * @count: the number of originators the multicast packet needs to be sent to + * + * Encapsulates the given multicast packet in a batman-adv multicast packet. + * A multicast tracker TVLV with destination originator addresses for any node + * that signaled interest in it, that is either via the translation table or the + * according want-all flags, is attached accordingly. + * + * Return: true on success, false otherwise. + */ +bool batadv_mcast_forw_push(struct batadv_priv *bat_priv, struct sk_buff *skb, + unsigned short vid, int is_routable, int count) +{ + unsigned short tvlv_len = 0; + int ret; + + if (batadv_mcast_forw_expand_head(bat_priv, skb) < 0) + goto err; + + skb_reset_transport_header(skb); + + ret = batadv_mcast_forw_push_tvlvs(bat_priv, skb, vid, is_routable, + count, &tvlv_len); + if (ret < 0) + goto err; + + ret = batadv_mcast_forw_push_hdr(skb, tvlv_len); + if (ret < 0) + goto err; + + return true; + +err: + if (tvlv_len) + skb_pull(skb, tvlv_len); + + return false; +} + +/** + * batadv_mcast_forw_mcsend() - send a self prepared batman-adv multicast packet + * @bat_priv: the bat priv with all the soft interface information + * @skb: the multicast packet to encapsulate and send + * + * Transmits a batman-adv multicast packet that was locally prepared and + * consumes/frees it. + * + * Return: NET_XMIT_DROP on memory allocation failure. NET_XMIT_SUCCESS + * otherwise. + */ +int batadv_mcast_forw_mcsend(struct batadv_priv *bat_priv, + struct sk_buff *skb) +{ + int ret = batadv_mcast_forw_packet(bat_priv, skb, true); + + if (ret < 0) { + kfree_skb(skb); + return NET_XMIT_DROP; + } + + consume_skb(skb); + return NET_XMIT_SUCCESS; +} diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index 0c64d81a7761..1f7ed9d4f6fd 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -7,6 +7,7 @@ #include "netlink.h" #include "main.h" +#include <linux/array_size.h> #include <linux/atomic.h> #include <linux/bitops.h> #include <linux/bug.h> @@ -20,7 +21,6 @@ #include <linux/if_ether.h> #include <linux/if_vlan.h> #include <linux/init.h> -#include <linux/kernel.h> #include <linux/limits.h> #include <linux/list.h> #include <linux/minmax.h> diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 34903df4fe93..71c143d4b6d0 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -312,6 +312,33 @@ batadv_orig_router_get(struct batadv_orig_node *orig_node, } /** + * batadv_orig_to_router() - get next hop neighbor to an orig address + * @bat_priv: the bat priv with all the soft interface information + * @orig_addr: the originator MAC address to search the best next hop router for + * @if_outgoing: the interface where the payload packet has been received or + * the OGM should be sent to + * + * Return: A neighbor node which is the best router towards the given originator + * address. + */ +struct batadv_neigh_node * +batadv_orig_to_router(struct batadv_priv *bat_priv, u8 *orig_addr, + struct batadv_hard_iface *if_outgoing) +{ + struct batadv_neigh_node *neigh_node; + struct batadv_orig_node *orig_node; + + orig_node = batadv_orig_hash_find(bat_priv, orig_addr); + if (!orig_node) + return NULL; + + neigh_node = batadv_find_router(bat_priv, orig_node, if_outgoing); + batadv_orig_node_put(orig_node); + + return neigh_node; +} + +/** * batadv_orig_ifinfo_get() - find the ifinfo from an orig_node * @orig_node: the orig node to be queried * @if_outgoing: the interface for which the ifinfo should be acquired @@ -942,6 +969,7 @@ struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv, #ifdef CONFIG_BATMAN_ADV_MCAST orig_node->mcast_flags = BATADV_MCAST_WANT_NO_RTR4; orig_node->mcast_flags |= BATADV_MCAST_WANT_NO_RTR6; + orig_node->mcast_flags |= BATADV_MCAST_HAVE_MC_PTYPE_CAPA; INIT_HLIST_NODE(&orig_node->mcast_want_all_unsnoopables_node); INIT_HLIST_NODE(&orig_node->mcast_want_all_ipv4_node); INIT_HLIST_NODE(&orig_node->mcast_want_all_ipv6_node); diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h index ea3d69e4e670..db0c55128170 100644 --- a/net/batman-adv/originator.h +++ b/net/batman-adv/originator.h @@ -36,6 +36,9 @@ void batadv_neigh_node_release(struct kref *ref); struct batadv_neigh_node * batadv_orig_router_get(struct batadv_orig_node *orig_node, const struct batadv_hard_iface *if_outgoing); +struct batadv_neigh_node * +batadv_orig_to_router(struct batadv_priv *bat_priv, u8 *orig_addr, + struct batadv_hard_iface *if_outgoing); struct batadv_neigh_ifinfo * batadv_neigh_ifinfo_new(struct batadv_neigh_node *neigh, struct batadv_hard_iface *if_outgoing); diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 163cd43c4821..f1061985149f 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -1270,3 +1270,73 @@ out: batadv_orig_node_put(orig_node); return ret; } + +#ifdef CONFIG_BATMAN_ADV_MCAST +/** + * batadv_recv_mcast_packet() - process received batman-adv multicast packet + * @skb: the received batman-adv multicast packet + * @recv_if: interface that the skb is received on + * + * Parses the given, received batman-adv multicast packet. Depending on the + * contents of its TVLV forwards it and/or decapsulates it to hand it to the + * soft interface. + * + * Return: NET_RX_DROP if the skb is not consumed, NET_RX_SUCCESS otherwise. + */ +int batadv_recv_mcast_packet(struct sk_buff *skb, + struct batadv_hard_iface *recv_if) +{ + struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface); + struct batadv_mcast_packet *mcast_packet; + int hdr_size = sizeof(*mcast_packet); + unsigned char *tvlv_buff; + int ret = NET_RX_DROP; + u16 tvlv_buff_len; + + if (batadv_check_unicast_packet(bat_priv, skb, hdr_size) < 0) + goto free_skb; + + /* create a copy of the skb, if needed, to modify it. */ + if (skb_cow(skb, ETH_HLEN) < 0) + goto free_skb; + + /* packet needs to be linearized to access the tvlv content */ + if (skb_linearize(skb) < 0) + goto free_skb; + + mcast_packet = (struct batadv_mcast_packet *)skb->data; + if (mcast_packet->ttl-- < 2) + goto free_skb; + + tvlv_buff = (unsigned char *)(skb->data + hdr_size); + tvlv_buff_len = ntohs(mcast_packet->tvlv_len); + + if (tvlv_buff_len > skb->len - hdr_size) + goto free_skb; + + ret = batadv_tvlv_containers_process(bat_priv, BATADV_MCAST, NULL, skb, + tvlv_buff, tvlv_buff_len); + if (ret >= 0) { + batadv_inc_counter(bat_priv, BATADV_CNT_MCAST_RX); + batadv_add_counter(bat_priv, BATADV_CNT_MCAST_RX_BYTES, + skb->len + ETH_HLEN); + } + + hdr_size += tvlv_buff_len; + + if (ret == NET_RX_SUCCESS && (skb->len - hdr_size >= ETH_HLEN)) { + batadv_inc_counter(bat_priv, BATADV_CNT_MCAST_RX_LOCAL); + batadv_add_counter(bat_priv, BATADV_CNT_MCAST_RX_LOCAL_BYTES, + skb->len - hdr_size); + + batadv_interface_rx(bat_priv->soft_iface, skb, hdr_size, NULL); + /* skb was consumed */ + skb = NULL; + } + +free_skb: + kfree_skb(skb); + + return ret; +} +#endif /* CONFIG_BATMAN_ADV_MCAST */ diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h index afd15b3879f1..e9849f032a24 100644 --- a/net/batman-adv/routing.h +++ b/net/batman-adv/routing.h @@ -27,6 +27,17 @@ int batadv_recv_frag_packet(struct sk_buff *skb, struct batadv_hard_iface *iface); int batadv_recv_bcast_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if); +#ifdef CONFIG_BATMAN_ADV_MCAST +int batadv_recv_mcast_packet(struct sk_buff *skb, + struct batadv_hard_iface *recv_if); +#else +static inline int batadv_recv_mcast_packet(struct sk_buff *skb, + struct batadv_hard_iface *recv_if) +{ + kfree_skb(skb); + return NET_RX_DROP; +} +#endif int batadv_recv_unicast_tvlv(struct sk_buff *skb, struct batadv_hard_iface *recv_if); int batadv_recv_unhandled_unicast_packet(struct sk_buff *skb, diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 1bf1232a4f75..89c51b3cf430 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -301,12 +301,13 @@ static netdev_tx_t batadv_interface_tx(struct sk_buff *skb, send: if (do_bcast && !is_broadcast_ether_addr(ethhdr->h_dest)) { - forw_mode = batadv_mcast_forw_mode(bat_priv, skb, + forw_mode = batadv_mcast_forw_mode(bat_priv, skb, vid, &mcast_is_routable); switch (forw_mode) { case BATADV_FORW_BCAST: break; case BATADV_FORW_UCASTS: + case BATADV_FORW_MCAST: do_bcast = false; break; case BATADV_FORW_NONE: @@ -365,6 +366,8 @@ send: } else if (forw_mode == BATADV_FORW_UCASTS) { ret = batadv_mcast_forw_send(bat_priv, skb, vid, mcast_is_routable); + } else if (forw_mode == BATADV_FORW_MCAST) { + ret = batadv_mcast_forw_mcsend(bat_priv, skb); } else { if (batadv_dat_snoop_outgoing_arp_request(bat_priv, skb)) @@ -762,6 +765,7 @@ static int batadv_softif_init_late(struct net_device *dev) atomic_set(&bat_priv->mcast.num_want_all_unsnoopables, 0); atomic_set(&bat_priv->mcast.num_want_all_ipv4, 0); atomic_set(&bat_priv->mcast.num_want_all_ipv6, 0); + atomic_set(&bat_priv->mcast.num_no_mc_ptype_capa, 0); #endif atomic_set(&bat_priv->gw.mode, BATADV_GW_MODE_OFF); atomic_set(&bat_priv->gw.bandwidth_down, 100); @@ -925,6 +929,18 @@ static const struct { { "tt_response_rx" }, { "tt_roam_adv_tx" }, { "tt_roam_adv_rx" }, +#ifdef CONFIG_BATMAN_ADV_MCAST + { "mcast_tx" }, + { "mcast_tx_bytes" }, + { "mcast_tx_local" }, + { "mcast_tx_local_bytes" }, + { "mcast_rx" }, + { "mcast_rx_bytes" }, + { "mcast_rx_local" }, + { "mcast_rx_local_bytes" }, + { "mcast_fwd" }, + { "mcast_fwd_bytes" }, +#endif #ifdef CONFIG_BATMAN_ADV_DAT { "dat_get_tx" }, { "dat_get_rx" }, diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 17d5ea1d8e84..00840d5784fe 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -862,6 +862,70 @@ enum batadv_counters { */ BATADV_CNT_TT_ROAM_ADV_RX, +#ifdef CONFIG_BATMAN_ADV_MCAST + /** + * @BATADV_CNT_MCAST_TX: transmitted batman-adv multicast packets + * counter + */ + BATADV_CNT_MCAST_TX, + + /** + * @BATADV_CNT_MCAST_TX_BYTES: transmitted batman-adv multicast packets + * bytes counter + */ + BATADV_CNT_MCAST_TX_BYTES, + + /** + * @BATADV_CNT_MCAST_TX_LOCAL: counter for multicast packets which + * were locally encapsulated and transmitted as batman-adv multicast + * packets + */ + BATADV_CNT_MCAST_TX_LOCAL, + + /** + * @BATADV_CNT_MCAST_TX_LOCAL_BYTES: bytes counter for multicast packets + * which were locally encapsulated and transmitted as batman-adv + * multicast packets + */ + BATADV_CNT_MCAST_TX_LOCAL_BYTES, + + /** + * @BATADV_CNT_MCAST_RX: received batman-adv multicast packet counter + */ + BATADV_CNT_MCAST_RX, + + /** + * @BATADV_CNT_MCAST_RX_BYTES: received batman-adv multicast packet + * bytes counter + */ + BATADV_CNT_MCAST_RX_BYTES, + + /** + * @BATADV_CNT_MCAST_RX_LOCAL: counter for received batman-adv multicast + * packets which were forwarded to the local soft interface + */ + BATADV_CNT_MCAST_RX_LOCAL, + + /** + * @BATADV_CNT_MCAST_RX_LOCAL_BYTES: bytes counter for received + * batman-adv multicast packets which were forwarded to the local soft + * interface + */ + BATADV_CNT_MCAST_RX_LOCAL_BYTES, + + /** + * @BATADV_CNT_MCAST_FWD: counter for received batman-adv multicast + * packets which were forwarded to other, neighboring nodes + */ + BATADV_CNT_MCAST_FWD, + + /** + * @BATADV_CNT_MCAST_FWD_BYTES: bytes counter for received batman-adv + * multicast packets which were forwarded to other, neighboring nodes + */ + BATADV_CNT_MCAST_FWD_BYTES, +#endif + #ifdef CONFIG_BATMAN_ADV_DAT /** * @BATADV_CNT_DAT_GET_TX: transmitted dht GET traffic packet counter @@ -1279,6 +1343,12 @@ struct batadv_priv_mcast { atomic_t num_want_all_rtr6; /** + * @num_no_mc_ptype_capa: counter for number of nodes without the + * BATADV_MCAST_HAVE_MC_PTYPE_CAPA flag + */ + atomic_t num_no_mc_ptype_capa; + + /** * @want_lists_lock: lock for protecting modifications to mcasts * want_all_{unsnoopables,ipv4,ipv6}_list (traversals are rcu-locked) */ diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c index 5918d1b32e19..8906f7bdf4a9 100644 --- a/net/bpf/bpf_dummy_struct_ops.c +++ b/net/bpf/bpf_dummy_struct_ops.c @@ -12,6 +12,11 @@ extern struct bpf_struct_ops bpf_bpf_dummy_ops; /* A common type for test_N with return value in bpf_dummy_ops */ typedef int (*dummy_ops_test_ret_fn)(struct bpf_dummy_ops_state *state, ...); +static int dummy_ops_test_ret_function(struct bpf_dummy_ops_state *state, ...) +{ + return 0; +} + struct bpf_dummy_ops_test_args { u64 args[MAX_BPF_FUNC_ARGS]; struct bpf_dummy_ops_state state; @@ -62,7 +67,7 @@ static int dummy_ops_copy_args(struct bpf_dummy_ops_test_args *args) static int dummy_ops_call_op(void *image, struct bpf_dummy_ops_test_args *args) { - dummy_ops_test_ret_fn test = (void *)image; + dummy_ops_test_ret_fn test = (void *)image + cfi_get_offset(); struct bpf_dummy_ops_state *state = NULL; /* state needs to be NULL if args[0] is 0 */ @@ -101,12 +106,11 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr, goto out; } - image = bpf_jit_alloc_exec(PAGE_SIZE); + image = arch_alloc_bpf_trampoline(PAGE_SIZE); if (!image) { err = -ENOMEM; goto out; } - set_vm_flush_reset_perms(image); link = kzalloc(sizeof(*link), GFP_USER); if (!link) { @@ -120,11 +124,12 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr, op_idx = prog->expected_attach_type; err = bpf_struct_ops_prepare_trampoline(tlinks, link, &st_ops->func_models[op_idx], + &dummy_ops_test_ret_function, image, image + PAGE_SIZE); if (err < 0) goto out; - set_memory_rox((long)image, 1); + arch_protect_bpf_trampoline(image, PAGE_SIZE); prog_ret = dummy_ops_call_op(image, args); err = dummy_ops_copy_args(args); @@ -134,7 +139,7 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr, err = -EFAULT; out: kfree(args); - bpf_jit_free_exec(image); + arch_free_bpf_trampoline(image, PAGE_SIZE); if (link) bpf_link_put(&link->link); kfree(tlinks); @@ -220,6 +225,28 @@ static void bpf_dummy_unreg(void *kdata) { } +static int bpf_dummy_test_1(struct bpf_dummy_ops_state *cb) +{ + return 0; +} + +static int bpf_dummy_test_2(struct bpf_dummy_ops_state *cb, int a1, unsigned short a2, + char a3, unsigned long a4) +{ + return 0; +} + +static int bpf_dummy_test_sleepable(struct bpf_dummy_ops_state *cb) +{ + return 0; +} + +static struct bpf_dummy_ops __bpf_bpf_dummy_ops = { + .test_1 = bpf_dummy_test_1, + .test_2 = bpf_dummy_test_2, + .test_sleepable = bpf_dummy_test_sleepable, +}; + struct bpf_struct_ops bpf_bpf_dummy_ops = { .verifier_ops = &bpf_dummy_verifier_ops, .init = bpf_dummy_init, @@ -228,4 +255,5 @@ struct bpf_struct_ops bpf_bpf_dummy_ops = { .reg = bpf_dummy_reg, .unreg = bpf_dummy_unreg, .name = "bpf_dummy_ops", + .cfi_stubs = &__bpf_bpf_dummy_ops, }; diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index c9fdcc5cdce1..dfd919374017 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -542,7 +542,7 @@ struct bpf_fentry_test_t { int noinline bpf_fentry_test7(struct bpf_fentry_test_t *arg) { - asm volatile (""); + asm volatile ("": "+r"(arg)); return (long)arg; } @@ -600,10 +600,21 @@ __bpf_kfunc void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) refcount_dec(&p->cnt); } +__bpf_kfunc void bpf_kfunc_call_test_release_dtor(void *p) +{ + bpf_kfunc_call_test_release(p); +} +CFI_NOSEAL(bpf_kfunc_call_test_release_dtor); + __bpf_kfunc void bpf_kfunc_call_memb_release(struct prog_test_member *p) { } +__bpf_kfunc void bpf_kfunc_call_memb_release_dtor(void *p) +{ +} +CFI_NOSEAL(bpf_kfunc_call_memb_release_dtor); + __bpf_kfunc_end_defs(); BTF_SET8_START(bpf_test_modify_return_ids) @@ -1671,9 +1682,9 @@ static const struct btf_kfunc_id_set bpf_prog_test_kfunc_set = { BTF_ID_LIST(bpf_prog_test_dtor_kfunc_ids) BTF_ID(struct, prog_test_ref_kfunc) -BTF_ID(func, bpf_kfunc_call_test_release) +BTF_ID(func, bpf_kfunc_call_test_release_dtor) BTF_ID(struct, prog_test_member) -BTF_ID(func, bpf_kfunc_call_memb_release) +BTF_ID(func, bpf_kfunc_call_memb_release_dtor) static int __init bpf_prog_test_run_init(void) { diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 8f40de3af154..65cee0ad3c1b 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -471,6 +471,7 @@ static const struct net_device_ops br_netdev_ops = { .ndo_fdb_get = br_fdb_get, .ndo_mdb_add = br_mdb_add, .ndo_mdb_del = br_mdb_del, + .ndo_mdb_del_bulk = br_mdb_del_bulk, .ndo_mdb_dump = br_mdb_dump, .ndo_mdb_get = br_mdb_get, .ndo_bridge_getlink = br_getlink, diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 8cc526067bc2..bc37e47ad829 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -1412,6 +1412,139 @@ int br_mdb_del(struct net_device *dev, struct nlattr *tb[], return err; } +struct br_mdb_flush_desc { + u32 port_ifindex; + u16 vid; + u8 rt_protocol; + u8 state; + u8 state_mask; +}; + +static const struct nla_policy br_mdbe_attrs_del_bulk_pol[MDBE_ATTR_MAX + 1] = { + [MDBE_ATTR_RTPROT] = NLA_POLICY_MIN(NLA_U8, RTPROT_STATIC), + [MDBE_ATTR_STATE_MASK] = NLA_POLICY_MASK(NLA_U8, MDB_PERMANENT), +}; + +static int br_mdb_flush_desc_init(struct br_mdb_flush_desc *desc, + struct nlattr *tb[], + struct netlink_ext_ack *extack) +{ + struct br_mdb_entry *entry = nla_data(tb[MDBA_SET_ENTRY]); + struct nlattr *mdbe_attrs[MDBE_ATTR_MAX + 1]; + int err; + + desc->port_ifindex = entry->ifindex; + desc->vid = entry->vid; + desc->state = entry->state; + + if (!tb[MDBA_SET_ENTRY_ATTRS]) + return 0; + + err = nla_parse_nested(mdbe_attrs, MDBE_ATTR_MAX, + tb[MDBA_SET_ENTRY_ATTRS], + br_mdbe_attrs_del_bulk_pol, extack); + if (err) + return err; + + if (mdbe_attrs[MDBE_ATTR_STATE_MASK]) + desc->state_mask = nla_get_u8(mdbe_attrs[MDBE_ATTR_STATE_MASK]); + + if (mdbe_attrs[MDBE_ATTR_RTPROT]) + desc->rt_protocol = nla_get_u8(mdbe_attrs[MDBE_ATTR_RTPROT]); + + return 0; +} + +static void br_mdb_flush_host(struct net_bridge *br, + struct net_bridge_mdb_entry *mp, + const struct br_mdb_flush_desc *desc) +{ + u8 state; + + if (desc->port_ifindex && desc->port_ifindex != br->dev->ifindex) + return; + + if (desc->rt_protocol) + return; + + state = br_group_is_l2(&mp->addr) ? MDB_PERMANENT : 0; + if (desc->state_mask && (state & desc->state_mask) != desc->state) + return; + + br_multicast_host_leave(mp, true); + if (!mp->ports && netif_running(br->dev)) + mod_timer(&mp->timer, jiffies); +} + +static void br_mdb_flush_pgs(struct net_bridge *br, + struct net_bridge_mdb_entry *mp, + const struct br_mdb_flush_desc *desc) +{ + struct net_bridge_port_group __rcu **pp; + struct net_bridge_port_group *p; + + for (pp = &mp->ports; (p = mlock_dereference(*pp, br)) != NULL;) { + u8 state; + + if (desc->port_ifindex && + desc->port_ifindex != p->key.port->dev->ifindex) { + pp = &p->next; + continue; + } + + if (desc->rt_protocol && desc->rt_protocol != p->rt_protocol) { + pp = &p->next; + continue; + } + + state = p->flags & MDB_PG_FLAGS_PERMANENT ? MDB_PERMANENT : 0; + if (desc->state_mask && + (state & desc->state_mask) != desc->state) { + pp = &p->next; + continue; + } + + br_multicast_del_pg(mp, p, pp); + } +} + +static void br_mdb_flush(struct net_bridge *br, + const struct br_mdb_flush_desc *desc) +{ + struct net_bridge_mdb_entry *mp; + + spin_lock_bh(&br->multicast_lock); + + /* Safe variant is not needed because entries are removed from the list + * upon group timer expiration or bridge deletion. + */ + hlist_for_each_entry(mp, &br->mdb_list, mdb_node) { + if (desc->vid && desc->vid != mp->addr.vid) + continue; + + br_mdb_flush_host(br, mp, desc); + br_mdb_flush_pgs(br, mp, desc); + } + + spin_unlock_bh(&br->multicast_lock); +} + +int br_mdb_del_bulk(struct net_device *dev, struct nlattr *tb[], + struct netlink_ext_ack *extack) +{ + struct net_bridge *br = netdev_priv(dev); + struct br_mdb_flush_desc desc = {}; + int err; + + err = br_mdb_flush_desc_init(&desc, tb, extack); + if (err) + return err; + + br_mdb_flush(br, &desc); + + return 0; +} + static const struct nla_policy br_mdbe_attrs_get_pol[MDBE_ATTR_MAX + 1] = { [MDBE_ATTR_SOURCE] = NLA_POLICY_RANGE(NLA_BINARY, sizeof(struct in_addr), diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 6b7f36769d03..b0a92c344722 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -186,6 +186,7 @@ enum { * struct net_bridge_vlan - per-vlan entry * * @vnode: rhashtable member + * @tnode: rhashtable member * @vid: VLAN id * @flags: bridge vlan flags * @priv_flags: private (in-kernel) bridge vlan flags @@ -196,6 +197,7 @@ enum { * @refcnt: if MASTER flag set, this is bumped for each port referencing it * @brvlan: if MASTER flag unset, this points to the global per-VLAN context * for this VLAN entry + * @tinfo: bridge tunnel info * @br_mcast_ctx: if MASTER flag set, this is the global vlan multicast context * @port_mcast_ctx: if MASTER flag unset, this is the per-port/vlan multicast * context @@ -1020,6 +1022,8 @@ int br_mdb_add(struct net_device *dev, struct nlattr *tb[], u16 nlmsg_flags, struct netlink_ext_ack *extack); int br_mdb_del(struct net_device *dev, struct nlattr *tb[], struct netlink_ext_ack *extack); +int br_mdb_del_bulk(struct net_device *dev, struct nlattr *tb[], + struct netlink_ext_ack *extack); int br_mdb_dump(struct net_device *dev, struct sk_buff *skb, struct netlink_callback *cb); int br_mdb_get(struct net_device *dev, struct nlattr *tb[], u32 portid, u32 seq, @@ -1428,6 +1432,12 @@ static inline int br_mdb_del(struct net_device *dev, struct nlattr *tb[], return -EOPNOTSUPP; } +static inline int br_mdb_del_bulk(struct net_device *dev, struct nlattr *tb[], + struct netlink_ext_ack *extack) +{ + return -EOPNOTSUPP; +} + static inline int br_mdb_dump(struct net_device *dev, struct sk_buff *skb, struct netlink_callback *cb) { diff --git a/net/core/Makefile b/net/core/Makefile index 0cb734cbc24b..821aec06abf1 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -18,7 +18,7 @@ obj-y += dev.o dev_addr_lists.o dst.o netevent.o \ obj-$(CONFIG_NETDEV_ADDR_LIST_TEST) += dev_addr_lists_test.o obj-y += net-sysfs.o -obj-$(CONFIG_PAGE_POOL) += page_pool.o +obj-$(CONFIG_PAGE_POOL) += page_pool.o page_pool_user.o obj-$(CONFIG_PROC_FS) += net-procfs.o obj-$(CONFIG_NET_PKTGEN) += pktgen.o obj-$(CONFIG_NETPOLL) += netpoll.o diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index cca7594be92e..6c4d90b24d46 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -275,9 +275,10 @@ BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk) static int bpf_sk_storage_charge(struct bpf_local_storage_map *smap, void *owner, u32 size) { - int optmem_max = READ_ONCE(sysctl_optmem_max); struct sock *sk = (struct sock *)owner; + int optmem_max; + optmem_max = READ_ONCE(sock_net(sk)->core.sysctl_optmem_max); /* same check as in sock_kmalloc() */ if (size <= optmem_max && atomic_read(&sk->sk_omem_alloc) + size < optmem_max) { diff --git a/net/core/dev.c b/net/core/dev.c index ad20bebe153f..f9d4b550ef4b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -165,7 +165,6 @@ static int netif_rx_internal(struct sk_buff *skb); static int call_netdevice_notifiers_extack(unsigned long val, struct net_device *dev, struct netlink_ext_ack *extack); -static struct napi_struct *napi_by_id(unsigned int napi_id); /* * The @dev_base_head list is protected by @dev_base_lock and the rtnl @@ -3757,6 +3756,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, qdisc_calculate_pkt_len(skb, q); + tcf_set_drop_reason(skb, SKB_DROP_REASON_QDISC_DROP); + if (q->flags & TCQ_F_NOLOCK) { if (q->flags & TCQ_F_CAN_BYPASS && nolock_qdisc_is_empty(q) && qdisc_run_begin(q)) { @@ -3786,7 +3787,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, no_lock_out: if (unlikely(to_free)) kfree_skb_list_reason(to_free, - SKB_DROP_REASON_QDISC_DROP); + tcf_get_drop_reason(to_free)); return rc; } @@ -3841,7 +3842,8 @@ no_lock_out: } spin_unlock(root_lock); if (unlikely(to_free)) - kfree_skb_list_reason(to_free, SKB_DROP_REASON_QDISC_DROP); + kfree_skb_list_reason(to_free, + tcf_get_drop_reason(to_free)); if (unlikely(contended)) spin_unlock(&q->busylock); return rc; @@ -3927,14 +3929,14 @@ static int tc_run(struct tcx_entry *entry, struct sk_buff *skb, tc_skb_cb(skb)->mru = 0; tc_skb_cb(skb)->post_ct = false; - res.drop_reason = *drop_reason; + tcf_set_drop_reason(skb, *drop_reason); mini_qdisc_bstats_cpu_update(miniq, skb); ret = tcf_classify(skb, miniq->block, miniq->filter_list, &res, false); /* Only tcf related quirks below. */ switch (ret) { case TC_ACT_SHOT: - *drop_reason = res.drop_reason; + *drop_reason = tcf_get_drop_reason(skb); mini_qdisc_qstats_cpu_drop(miniq); break; case TC_ACT_OK: @@ -6142,7 +6144,7 @@ bool napi_complete_done(struct napi_struct *n, int work_done) EXPORT_SYMBOL(napi_complete_done); /* must be called under rcu_read_lock(), as we dont take a reference */ -static struct napi_struct *napi_by_id(unsigned int napi_id) +struct napi_struct *napi_by_id(unsigned int napi_id) { unsigned int hash = napi_id % HASH_SIZE(napi_hash); struct napi_struct *napi; @@ -6403,6 +6405,43 @@ int dev_set_threaded(struct net_device *dev, bool threaded) } EXPORT_SYMBOL(dev_set_threaded); +/** + * netif_queue_set_napi - Associate queue with the napi + * @dev: device to which NAPI and queue belong + * @queue_index: Index of queue + * @type: queue type as RX or TX + * @napi: NAPI context, pass NULL to clear previously set NAPI + * + * Set queue with its corresponding napi context. This should be done after + * registering the NAPI handler for the queue-vector and the queues have been + * mapped to the corresponding interrupt vector. + */ +void netif_queue_set_napi(struct net_device *dev, unsigned int queue_index, + enum netdev_queue_type type, struct napi_struct *napi) +{ + struct netdev_rx_queue *rxq; + struct netdev_queue *txq; + + if (WARN_ON_ONCE(napi && !napi->dev)) + return; + if (dev->reg_state >= NETREG_REGISTERED) + ASSERT_RTNL(); + + switch (type) { + case NETDEV_QUEUE_TYPE_RX: + rxq = __netif_get_rx_queue(dev, queue_index); + rxq->napi = napi; + return; + case NETDEV_QUEUE_TYPE_TX: + txq = netdev_get_tx_queue(dev, queue_index); + txq->napi = napi; + return; + default: + return; + } +} +EXPORT_SYMBOL(netif_queue_set_napi); + void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi, int (*poll)(struct napi_struct *, int), int weight) { @@ -6438,6 +6477,7 @@ void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi, */ if (dev->threaded && napi_kthread_create(napi)) dev->threaded = 0; + netif_napi_set_irq(napi, -1); } EXPORT_SYMBOL(netif_napi_add_weight); @@ -10514,7 +10554,7 @@ void netdev_run_todo(void) write_lock(&dev_base_lock); dev->reg_state = NETREG_UNREGISTERED; write_unlock(&dev_base_lock); - linkwatch_forget_dev(dev); + linkwatch_sync_dev(dev); } while (!list_empty(&list)) { @@ -11239,17 +11279,19 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net, dev_net_set(dev, net); dev->ifindex = new_ifindex; - /* Send a netdev-add uevent to the new namespace */ - kobject_uevent(&dev->dev.kobj, KOBJ_ADD); - netdev_adjacent_add_links(dev); - if (new_name[0]) /* Rename the netdev to prepared name */ strscpy(dev->name, new_name, IFNAMSIZ); /* Fixup kobjects */ + dev_set_uevent_suppress(&dev->dev, 1); err = device_rename(&dev->dev, dev->name); + dev_set_uevent_suppress(&dev->dev, 0); WARN_ON(err); + /* Send a netdev-add uevent to the new namespace */ + kobject_uevent(&dev->dev.kobj, KOBJ_ADD); + netdev_adjacent_add_links(dev); + /* Adapt owner in case owning user namespace of target network * namespace is different from the original one. */ @@ -11573,6 +11615,60 @@ static struct pernet_operations __net_initdata default_device_ops = { .exit_batch = default_device_exit_batch, }; +static void __init net_dev_struct_check(void) +{ + /* TX read-mostly hotpath */ + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, priv_flags); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, netdev_ops); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, header_ops); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, _tx); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, real_num_tx_queues); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, gso_max_size); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, gso_ipv4_max_size); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, gso_max_segs); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, num_tc); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, mtu); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, needed_headroom); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, tc_to_txq); +#ifdef CONFIG_XPS + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, xps_maps); +#endif +#ifdef CONFIG_NETFILTER_EGRESS + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, nf_hooks_egress); +#endif +#ifdef CONFIG_NET_XGRESS + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, tcx_egress); +#endif + CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_tx, 152); + + /* TXRX read-mostly hotpath */ + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, flags); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, hard_header_len); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, features); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, ip6_ptr); + CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_txrx, 30); + + /* RX read-mostly hotpath */ + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, ptype_specific); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, ifindex); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, real_num_rx_queues); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, _rx); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_flush_timeout); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, napi_defer_hard_irqs); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_max_size); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_ipv4_max_size); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, rx_handler); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, rx_handler_data); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, nd_net); +#ifdef CONFIG_NETPOLL + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, npinfo); +#endif +#ifdef CONFIG_NET_XGRESS + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, tcx_ingress); +#endif + CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_rx, 96); +} + /* * Initialize the DEV module. At boot time this walks the device list and * unhooks any devices that fail to initialise (normally hardware not @@ -11590,6 +11686,8 @@ static int __init net_dev_init(void) BUG_ON(!dev_boot_phase); + net_dev_struct_check(); + if (dev_proc_init()) goto out; diff --git a/net/core/dev.h b/net/core/dev.h index 5aa45f0fd4ae..cf93e188785b 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -30,7 +30,6 @@ int __init dev_proc_init(void); #endif void linkwatch_init_dev(struct net_device *dev); -void linkwatch_forget_dev(struct net_device *dev); void linkwatch_run_queue(void); void dev_addr_flush(struct net_device *dev); @@ -145,4 +144,6 @@ void xdp_do_check_flushed(struct napi_struct *napi); #else static inline void xdp_do_check_flushed(struct napi_struct *napi) { } #endif + +struct napi_struct *napi_by_id(unsigned int napi_id); #endif diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index feeddf95f450..9a66cf5015f2 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -322,9 +322,9 @@ static int dev_get_hwtstamp(struct net_device *dev, struct ifreq *ifr) * frames and not forward them), it must set IFF_SEE_ALL_HWTSTAMP_REQUESTS in * dev->priv_flags. */ -static int dev_set_hwtstamp_phylib(struct net_device *dev, - struct kernel_hwtstamp_config *cfg, - struct netlink_ext_ack *extack) +int dev_set_hwtstamp_phylib(struct net_device *dev, + struct kernel_hwtstamp_config *cfg, + struct netlink_ext_ack *extack) { const struct net_device_ops *ops = dev->netdev_ops; bool phy_ts = phy_has_hwtstamp(dev->phydev); @@ -363,6 +363,7 @@ static int dev_set_hwtstamp_phylib(struct net_device *dev, return 0; } +EXPORT_SYMBOL_GPL(dev_set_hwtstamp_phylib); static int dev_set_hwtstamp(struct net_device *dev, struct ifreq *ifr) { diff --git a/net/core/filter.c b/net/core/filter.c index 1737884be52f..24061f29c9dd 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -203,7 +203,7 @@ BPF_CALL_3(bpf_skb_get_nlattr_nest, struct sk_buff *, skb, u32, a, u32, x) return 0; nla = (struct nlattr *) &skb->data[a]; - if (nla->nla_len > skb->len - a) + if (!nla_ok(nla, skb->len - a)) return 0; nla = nla_find_nested(nla, x); @@ -1219,8 +1219,8 @@ void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp) */ static bool __sk_filter_charge(struct sock *sk, struct sk_filter *fp) { + int optmem_max = READ_ONCE(sock_net(sk)->core.sysctl_optmem_max); u32 filter_size = bpf_prog_size(fp->prog->len); - int optmem_max = READ_ONCE(sysctl_optmem_max); /* same check as in sock_kmalloc() */ if (filter_size <= optmem_max && @@ -1550,12 +1550,13 @@ EXPORT_SYMBOL_GPL(sk_attach_filter); int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk) { struct bpf_prog *prog = __get_filter(fprog, sk); - int err; + int err, optmem_max; if (IS_ERR(prog)) return PTR_ERR(prog); - if (bpf_prog_size(prog->len) > READ_ONCE(sysctl_optmem_max)) + optmem_max = READ_ONCE(sock_net(sk)->core.sysctl_optmem_max); + if (bpf_prog_size(prog->len) > optmem_max) err = -ENOMEM; else err = reuseport_attach_prog(sk, prog); @@ -1594,7 +1595,7 @@ int sk_attach_bpf(u32 ufd, struct sock *sk) int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk) { struct bpf_prog *prog; - int err; + int err, optmem_max; if (sock_flag(sk, SOCK_FILTER_LOCKED)) return -EPERM; @@ -1622,7 +1623,8 @@ int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk) } } else { /* BPF_PROG_TYPE_SOCKET_FILTER */ - if (bpf_prog_size(prog->len) > READ_ONCE(sysctl_optmem_max)) { + optmem_max = READ_ONCE(sock_net(sk)->core.sysctl_optmem_max); + if (bpf_prog_size(prog->len) > optmem_max) { err = -ENOMEM; goto err_prog_put; } @@ -7257,7 +7259,6 @@ BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len struct tcphdr *, th, u32, th_len) { #ifdef CONFIG_SYN_COOKIES - u32 cookie; int ret; if (unlikely(!sk || th_len < sizeof(*th))) @@ -7279,8 +7280,6 @@ BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len if (tcp_synq_no_recent_overflow(sk)) return -ENOENT; - cookie = ntohl(th->ack_seq) - 1; - /* Both struct iphdr and struct ipv6hdr have the version field at the * same offset so we can cast to the shorter header (struct iphdr). */ @@ -7289,7 +7288,7 @@ BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len if (sk->sk_family == AF_INET6 && ipv6_only_sock(sk)) return -EINVAL; - ret = __cookie_v4_check((struct iphdr *)iph, th, cookie); + ret = __cookie_v4_check((struct iphdr *)iph, th); break; #if IS_BUILTIN(CONFIG_IPV6) @@ -7300,7 +7299,7 @@ BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len if (sk->sk_family != AF_INET6) return -EINVAL; - ret = __cookie_v6_check((struct ipv6hdr *)iph, th, cookie); + ret = __cookie_v6_check((struct ipv6hdr *)iph, th); break; #endif /* CONFIG_IPV6 */ @@ -7753,9 +7752,7 @@ static const struct bpf_func_proto bpf_tcp_raw_gen_syncookie_ipv6_proto = { BPF_CALL_2(bpf_tcp_raw_check_syncookie_ipv4, struct iphdr *, iph, struct tcphdr *, th) { - u32 cookie = ntohl(th->ack_seq) - 1; - - if (__cookie_v4_check(iph, th, cookie) > 0) + if (__cookie_v4_check(iph, th) > 0) return 0; return -EACCES; @@ -7776,9 +7773,7 @@ BPF_CALL_2(bpf_tcp_raw_check_syncookie_ipv6, struct ipv6hdr *, iph, struct tcphdr *, th) { #if IS_BUILTIN(CONFIG_IPV6) - u32 cookie = ntohl(th->ack_seq) - 1; - - if (__cookie_v6_check(iph, th, cookie) > 0) + if (__cookie_v6_check(iph, th) > 0) return 0; return -EACCES; diff --git a/net/core/link_watch.c b/net/core/link_watch.c index c469d1c4db5d..429571c258da 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -192,7 +192,10 @@ static void __linkwatch_run_queue(int urgent_only) #define MAX_DO_DEV_PER_LOOP 100 int do_dev = MAX_DO_DEV_PER_LOOP; - struct net_device *dev; + /* Use a local list here since we add non-urgent + * events back to the global one when called with + * urgent_only=1. + */ LIST_HEAD(wrk); /* Give urgent case more budget */ @@ -218,6 +221,7 @@ static void __linkwatch_run_queue(int urgent_only) list_splice_init(&lweventlist, &wrk); while (!list_empty(&wrk) && do_dev > 0) { + struct net_device *dev; dev = list_first_entry(&wrk, struct net_device, link_watch_list); list_del_init(&dev->link_watch_list); @@ -245,7 +249,7 @@ static void __linkwatch_run_queue(int urgent_only) spin_unlock_irq(&lweventlist_lock); } -void linkwatch_forget_dev(struct net_device *dev) +void linkwatch_sync_dev(struct net_device *dev) { unsigned long flags; int clean = 0; diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index fccaa5bac0ed..a09d507c5b03 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -193,11 +193,22 @@ static ssize_t carrier_show(struct device *dev, struct device_attribute *attr, char *buf) { struct net_device *netdev = to_net_dev(dev); + int ret = -EINVAL; - if (netif_running(netdev)) - return sysfs_emit(buf, fmt_dec, !!netif_carrier_ok(netdev)); + if (!rtnl_trylock()) + return restart_syscall(); - return -EINVAL; + if (netif_running(netdev)) { + /* Synchronize carrier state with link watch, + * see also rtnl_getlink(). + */ + linkwatch_sync_dev(netdev); + + ret = sysfs_emit(buf, fmt_dec, !!netif_carrier_ok(netdev)); + } + rtnl_unlock(); + + return ret; } static DEVICE_ATTR_RW(carrier); diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index f4183c4c1ec8..72799533426b 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -372,6 +372,10 @@ out_undo: static int __net_init net_defaults_init_net(struct net *net) { net->core.sysctl_somaxconn = SOMAXCONN; + /* Limits per socket sk_omem_alloc usage. + * TCP zerocopy regular usage needs 128 KB. + */ + net->core.sysctl_optmem_max = 128 * 1024; net->core.sysctl_txrehash = SOCK_TXREHASH_ENABLED; return 0; @@ -1099,11 +1103,56 @@ out: rtnl_set_sk_err(net, RTNLGRP_NSID, err); } +#ifdef CONFIG_NET_NS +static void __init netns_ipv4_struct_check(void) +{ + /* TX readonly hotpath cache lines */ + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx, + sysctl_tcp_early_retrans); + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx, + sysctl_tcp_tso_win_divisor); + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx, + sysctl_tcp_tso_rtt_log); + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx, + sysctl_tcp_autocorking); + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx, + sysctl_tcp_min_snd_mss); + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx, + sysctl_tcp_notsent_lowat); + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx, + sysctl_tcp_limit_output_bytes); + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx, + sysctl_tcp_min_rtt_wlen); + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx, + sysctl_tcp_wmem); + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx, + sysctl_ip_fwd_use_pmtu); + CACHELINE_ASSERT_GROUP_SIZE(struct netns_ipv4, netns_ipv4_read_tx, 33); + + /* TXRX readonly hotpath cache lines */ + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_txrx, + sysctl_tcp_moderate_rcvbuf); + CACHELINE_ASSERT_GROUP_SIZE(struct netns_ipv4, netns_ipv4_read_txrx, 1); + + /* RX readonly hotpath cache line */ + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx, + sysctl_ip_early_demux); + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx, + sysctl_tcp_early_demux); + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx, + sysctl_tcp_reordering); + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx, + sysctl_tcp_rmem); + CACHELINE_ASSERT_GROUP_SIZE(struct netns_ipv4, netns_ipv4_read_rx, 18); +} +#endif + void __init net_ns_init(void) { struct net_generic *ng; #ifdef CONFIG_NET_NS + netns_ipv4_struct_check(); net_cachep = kmem_cache_create("net_namespace", sizeof(struct net), SMP_CACHE_BYTES, SLAB_PANIC|SLAB_ACCOUNT, NULL); diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c index ea9231378aa6..be7f2ebd61b2 100644 --- a/net/core/netdev-genl-gen.c +++ b/net/core/netdev-genl-gen.c @@ -10,11 +10,64 @@ #include <uapi/linux/netdev.h> +/* Integer value ranges */ +static const struct netlink_range_validation netdev_a_page_pool_id_range = { + .min = 1ULL, + .max = 4294967295ULL, +}; + +static const struct netlink_range_validation netdev_a_page_pool_ifindex_range = { + .min = 1ULL, + .max = 2147483647ULL, +}; + +/* Common nested types */ +const struct nla_policy netdev_page_pool_info_nl_policy[NETDEV_A_PAGE_POOL_IFINDEX + 1] = { + [NETDEV_A_PAGE_POOL_ID] = NLA_POLICY_FULL_RANGE(NLA_UINT, &netdev_a_page_pool_id_range), + [NETDEV_A_PAGE_POOL_IFINDEX] = NLA_POLICY_FULL_RANGE(NLA_U32, &netdev_a_page_pool_ifindex_range), +}; + /* NETDEV_CMD_DEV_GET - do */ static const struct nla_policy netdev_dev_get_nl_policy[NETDEV_A_DEV_IFINDEX + 1] = { [NETDEV_A_DEV_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1), }; +/* NETDEV_CMD_PAGE_POOL_GET - do */ +#ifdef CONFIG_PAGE_POOL +static const struct nla_policy netdev_page_pool_get_nl_policy[NETDEV_A_PAGE_POOL_ID + 1] = { + [NETDEV_A_PAGE_POOL_ID] = NLA_POLICY_FULL_RANGE(NLA_UINT, &netdev_a_page_pool_id_range), +}; +#endif /* CONFIG_PAGE_POOL */ + +/* NETDEV_CMD_PAGE_POOL_STATS_GET - do */ +#ifdef CONFIG_PAGE_POOL_STATS +static const struct nla_policy netdev_page_pool_stats_get_nl_policy[NETDEV_A_PAGE_POOL_STATS_INFO + 1] = { + [NETDEV_A_PAGE_POOL_STATS_INFO] = NLA_POLICY_NESTED(netdev_page_pool_info_nl_policy), +}; +#endif /* CONFIG_PAGE_POOL_STATS */ + +/* NETDEV_CMD_QUEUE_GET - do */ +static const struct nla_policy netdev_queue_get_do_nl_policy[NETDEV_A_QUEUE_TYPE + 1] = { + [NETDEV_A_QUEUE_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1), + [NETDEV_A_QUEUE_TYPE] = NLA_POLICY_MAX(NLA_U32, 1), + [NETDEV_A_QUEUE_ID] = { .type = NLA_U32, }, +}; + +/* NETDEV_CMD_QUEUE_GET - dump */ +static const struct nla_policy netdev_queue_get_dump_nl_policy[NETDEV_A_QUEUE_IFINDEX + 1] = { + [NETDEV_A_QUEUE_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1), +}; + +/* NETDEV_CMD_NAPI_GET - do */ +static const struct nla_policy netdev_napi_get_do_nl_policy[NETDEV_A_NAPI_ID + 1] = { + [NETDEV_A_NAPI_ID] = { .type = NLA_U32, }, +}; + +/* NETDEV_CMD_NAPI_GET - dump */ +static const struct nla_policy netdev_napi_get_dump_nl_policy[NETDEV_A_NAPI_IFINDEX + 1] = { + [NETDEV_A_NAPI_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1), +}; + /* Ops table for netdev */ static const struct genl_split_ops netdev_nl_ops[] = { { @@ -29,10 +82,67 @@ static const struct genl_split_ops netdev_nl_ops[] = { .dumpit = netdev_nl_dev_get_dumpit, .flags = GENL_CMD_CAP_DUMP, }, +#ifdef CONFIG_PAGE_POOL + { + .cmd = NETDEV_CMD_PAGE_POOL_GET, + .doit = netdev_nl_page_pool_get_doit, + .policy = netdev_page_pool_get_nl_policy, + .maxattr = NETDEV_A_PAGE_POOL_ID, + .flags = GENL_CMD_CAP_DO, + }, + { + .cmd = NETDEV_CMD_PAGE_POOL_GET, + .dumpit = netdev_nl_page_pool_get_dumpit, + .flags = GENL_CMD_CAP_DUMP, + }, +#endif /* CONFIG_PAGE_POOL */ +#ifdef CONFIG_PAGE_POOL_STATS + { + .cmd = NETDEV_CMD_PAGE_POOL_STATS_GET, + .doit = netdev_nl_page_pool_stats_get_doit, + .policy = netdev_page_pool_stats_get_nl_policy, + .maxattr = NETDEV_A_PAGE_POOL_STATS_INFO, + .flags = GENL_CMD_CAP_DO, + }, + { + .cmd = NETDEV_CMD_PAGE_POOL_STATS_GET, + .dumpit = netdev_nl_page_pool_stats_get_dumpit, + .flags = GENL_CMD_CAP_DUMP, + }, +#endif /* CONFIG_PAGE_POOL_STATS */ + { + .cmd = NETDEV_CMD_QUEUE_GET, + .doit = netdev_nl_queue_get_doit, + .policy = netdev_queue_get_do_nl_policy, + .maxattr = NETDEV_A_QUEUE_TYPE, + .flags = GENL_CMD_CAP_DO, + }, + { + .cmd = NETDEV_CMD_QUEUE_GET, + .dumpit = netdev_nl_queue_get_dumpit, + .policy = netdev_queue_get_dump_nl_policy, + .maxattr = NETDEV_A_QUEUE_IFINDEX, + .flags = GENL_CMD_CAP_DUMP, + }, + { + .cmd = NETDEV_CMD_NAPI_GET, + .doit = netdev_nl_napi_get_doit, + .policy = netdev_napi_get_do_nl_policy, + .maxattr = NETDEV_A_NAPI_ID, + .flags = GENL_CMD_CAP_DO, + }, + { + .cmd = NETDEV_CMD_NAPI_GET, + .dumpit = netdev_nl_napi_get_dumpit, + .policy = netdev_napi_get_dump_nl_policy, + .maxattr = NETDEV_A_NAPI_IFINDEX, + .flags = GENL_CMD_CAP_DUMP, + }, }; static const struct genl_multicast_group netdev_nl_mcgrps[] = { [NETDEV_NLGRP_MGMT] = { "mgmt", }, + [NETDEV_NLGRP_PAGE_POOL] = { "page-pool", }, }; struct genl_family netdev_nl_family __ro_after_init = { diff --git a/net/core/netdev-genl-gen.h b/net/core/netdev-genl-gen.h index 7b370c073e7d..a47f2bcbe4fa 100644 --- a/net/core/netdev-genl-gen.h +++ b/net/core/netdev-genl-gen.h @@ -11,11 +11,27 @@ #include <uapi/linux/netdev.h> +/* Common nested types */ +extern const struct nla_policy netdev_page_pool_info_nl_policy[NETDEV_A_PAGE_POOL_IFINDEX + 1]; + int netdev_nl_dev_get_doit(struct sk_buff *skb, struct genl_info *info); int netdev_nl_dev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); +int netdev_nl_page_pool_get_doit(struct sk_buff *skb, struct genl_info *info); +int netdev_nl_page_pool_get_dumpit(struct sk_buff *skb, + struct netlink_callback *cb); +int netdev_nl_page_pool_stats_get_doit(struct sk_buff *skb, + struct genl_info *info); +int netdev_nl_page_pool_stats_get_dumpit(struct sk_buff *skb, + struct netlink_callback *cb); +int netdev_nl_queue_get_doit(struct sk_buff *skb, struct genl_info *info); +int netdev_nl_queue_get_dumpit(struct sk_buff *skb, + struct netlink_callback *cb); +int netdev_nl_napi_get_doit(struct sk_buff *skb, struct genl_info *info); +int netdev_nl_napi_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); enum { NETDEV_NLGRP_MGMT, + NETDEV_NLGRP_PAGE_POOL, }; extern struct genl_family netdev_nl_family; diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index fe61f85bcf33..fd98936da3ae 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -6,13 +6,32 @@ #include <net/net_namespace.h> #include <net/sock.h> #include <net/xdp.h> +#include <net/xdp_sock.h> +#include <net/netdev_rx_queue.h> +#include <net/busy_poll.h> #include "netdev-genl-gen.h" +#include "dev.h" + +struct netdev_nl_dump_ctx { + unsigned long ifindex; + unsigned int rxq_idx; + unsigned int txq_idx; + unsigned int napi_id; +}; + +static struct netdev_nl_dump_ctx *netdev_dump_ctx(struct netlink_callback *cb) +{ + NL_ASSERT_DUMP_CTX_FITS(struct netdev_nl_dump_ctx); + + return (struct netdev_nl_dump_ctx *)cb->ctx; +} static int netdev_nl_dev_fill(struct net_device *netdev, struct sk_buff *rsp, const struct genl_info *info) { + u64 xsk_features = 0; u64 xdp_rx_meta = 0; void *hdr; @@ -26,11 +45,20 @@ netdev_nl_dev_fill(struct net_device *netdev, struct sk_buff *rsp, XDP_METADATA_KFUNC_xxx #undef XDP_METADATA_KFUNC + if (netdev->xsk_tx_metadata_ops) { + if (netdev->xsk_tx_metadata_ops->tmo_fill_timestamp) + xsk_features |= NETDEV_XSK_FLAGS_TX_TIMESTAMP; + if (netdev->xsk_tx_metadata_ops->tmo_request_checksum) + xsk_features |= NETDEV_XSK_FLAGS_TX_CHECKSUM; + } + if (nla_put_u32(rsp, NETDEV_A_DEV_IFINDEX, netdev->ifindex) || nla_put_u64_64bit(rsp, NETDEV_A_DEV_XDP_FEATURES, netdev->xdp_features, NETDEV_A_DEV_PAD) || nla_put_u64_64bit(rsp, NETDEV_A_DEV_XDP_RX_METADATA_FEATURES, - xdp_rx_meta, NETDEV_A_DEV_PAD)) { + xdp_rx_meta, NETDEV_A_DEV_PAD) || + nla_put_u64_64bit(rsp, NETDEV_A_DEV_XSK_FEATURES, + xsk_features, NETDEV_A_DEV_PAD)) { genlmsg_cancel(rsp, hdr); return -EINVAL; } @@ -111,12 +139,13 @@ err_free_msg: int netdev_nl_dev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { + struct netdev_nl_dump_ctx *ctx = netdev_dump_ctx(cb); struct net *net = sock_net(skb->sk); struct net_device *netdev; int err = 0; rtnl_lock(); - for_each_netdev_dump(net, netdev, cb->args[0]) { + for_each_netdev_dump(net, netdev, ctx->ifindex) { err = netdev_nl_dev_fill(netdev, skb, genl_info_dump(cb)); if (err < 0) break; @@ -129,6 +158,317 @@ int netdev_nl_dev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } +static int +netdev_nl_napi_fill_one(struct sk_buff *rsp, struct napi_struct *napi, + const struct genl_info *info) +{ + void *hdr; + pid_t pid; + + if (WARN_ON_ONCE(!napi->dev)) + return -EINVAL; + if (!(napi->dev->flags & IFF_UP)) + return 0; + + hdr = genlmsg_iput(rsp, info); + if (!hdr) + return -EMSGSIZE; + + if (napi->napi_id >= MIN_NAPI_ID && + nla_put_u32(rsp, NETDEV_A_NAPI_ID, napi->napi_id)) + goto nla_put_failure; + + if (nla_put_u32(rsp, NETDEV_A_NAPI_IFINDEX, napi->dev->ifindex)) + goto nla_put_failure; + + if (napi->irq >= 0 && nla_put_u32(rsp, NETDEV_A_NAPI_IRQ, napi->irq)) + goto nla_put_failure; + + if (napi->thread) { + pid = task_pid_nr(napi->thread); + if (nla_put_u32(rsp, NETDEV_A_NAPI_PID, pid)) + goto nla_put_failure; + } + + genlmsg_end(rsp, hdr); + + return 0; + +nla_put_failure: + genlmsg_cancel(rsp, hdr); + return -EMSGSIZE; +} + +int netdev_nl_napi_get_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct napi_struct *napi; + struct sk_buff *rsp; + u32 napi_id; + int err; + + if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_NAPI_ID)) + return -EINVAL; + + napi_id = nla_get_u32(info->attrs[NETDEV_A_NAPI_ID]); + + rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!rsp) + return -ENOMEM; + + rtnl_lock(); + + napi = napi_by_id(napi_id); + if (napi) + err = netdev_nl_napi_fill_one(rsp, napi, info); + else + err = -EINVAL; + + rtnl_unlock(); + + if (err) + goto err_free_msg; + + return genlmsg_reply(rsp, info); + +err_free_msg: + nlmsg_free(rsp); + return err; +} + +static int +netdev_nl_napi_dump_one(struct net_device *netdev, struct sk_buff *rsp, + const struct genl_info *info, + struct netdev_nl_dump_ctx *ctx) +{ + struct napi_struct *napi; + int err = 0; + + if (!(netdev->flags & IFF_UP)) + return err; + + list_for_each_entry(napi, &netdev->napi_list, dev_list) { + if (ctx->napi_id && napi->napi_id >= ctx->napi_id) + continue; + + err = netdev_nl_napi_fill_one(rsp, napi, info); + if (err) + return err; + ctx->napi_id = napi->napi_id; + } + return err; +} + +int netdev_nl_napi_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct netdev_nl_dump_ctx *ctx = netdev_dump_ctx(cb); + const struct genl_info *info = genl_info_dump(cb); + struct net *net = sock_net(skb->sk); + struct net_device *netdev; + u32 ifindex = 0; + int err = 0; + + if (info->attrs[NETDEV_A_NAPI_IFINDEX]) + ifindex = nla_get_u32(info->attrs[NETDEV_A_NAPI_IFINDEX]); + + rtnl_lock(); + if (ifindex) { + netdev = __dev_get_by_index(net, ifindex); + if (netdev) + err = netdev_nl_napi_dump_one(netdev, skb, info, ctx); + else + err = -ENODEV; + } else { + for_each_netdev_dump(net, netdev, ctx->ifindex) { + err = netdev_nl_napi_dump_one(netdev, skb, info, ctx); + if (err < 0) + break; + ctx->napi_id = 0; + } + } + rtnl_unlock(); + + if (err != -EMSGSIZE) + return err; + + return skb->len; +} + +static int +netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev, + u32 q_idx, u32 q_type, const struct genl_info *info) +{ + struct netdev_rx_queue *rxq; + struct netdev_queue *txq; + void *hdr; + + hdr = genlmsg_iput(rsp, info); + if (!hdr) + return -EMSGSIZE; + + if (nla_put_u32(rsp, NETDEV_A_QUEUE_ID, q_idx) || + nla_put_u32(rsp, NETDEV_A_QUEUE_TYPE, q_type) || + nla_put_u32(rsp, NETDEV_A_QUEUE_IFINDEX, netdev->ifindex)) + goto nla_put_failure; + + switch (q_type) { + case NETDEV_QUEUE_TYPE_RX: + rxq = __netif_get_rx_queue(netdev, q_idx); + if (rxq->napi && nla_put_u32(rsp, NETDEV_A_QUEUE_NAPI_ID, + rxq->napi->napi_id)) + goto nla_put_failure; + break; + case NETDEV_QUEUE_TYPE_TX: + txq = netdev_get_tx_queue(netdev, q_idx); + if (txq->napi && nla_put_u32(rsp, NETDEV_A_QUEUE_NAPI_ID, + txq->napi->napi_id)) + goto nla_put_failure; + } + + genlmsg_end(rsp, hdr); + + return 0; + +nla_put_failure: + genlmsg_cancel(rsp, hdr); + return -EMSGSIZE; +} + +static int netdev_nl_queue_validate(struct net_device *netdev, u32 q_id, + u32 q_type) +{ + switch (q_type) { + case NETDEV_QUEUE_TYPE_RX: + if (q_id >= netdev->real_num_rx_queues) + return -EINVAL; + return 0; + case NETDEV_QUEUE_TYPE_TX: + if (q_id >= netdev->real_num_tx_queues) + return -EINVAL; + } + return 0; +} + +static int +netdev_nl_queue_fill(struct sk_buff *rsp, struct net_device *netdev, u32 q_idx, + u32 q_type, const struct genl_info *info) +{ + int err = 0; + + if (!(netdev->flags & IFF_UP)) + return err; + + err = netdev_nl_queue_validate(netdev, q_idx, q_type); + if (err) + return err; + + return netdev_nl_queue_fill_one(rsp, netdev, q_idx, q_type, info); +} + +int netdev_nl_queue_get_doit(struct sk_buff *skb, struct genl_info *info) +{ + u32 q_id, q_type, ifindex; + struct net_device *netdev; + struct sk_buff *rsp; + int err; + + if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_ID) || + GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_TYPE) || + GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_IFINDEX)) + return -EINVAL; + + q_id = nla_get_u32(info->attrs[NETDEV_A_QUEUE_ID]); + q_type = nla_get_u32(info->attrs[NETDEV_A_QUEUE_TYPE]); + ifindex = nla_get_u32(info->attrs[NETDEV_A_QUEUE_IFINDEX]); + + rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!rsp) + return -ENOMEM; + + rtnl_lock(); + + netdev = __dev_get_by_index(genl_info_net(info), ifindex); + if (netdev) + err = netdev_nl_queue_fill(rsp, netdev, q_id, q_type, info); + else + err = -ENODEV; + + rtnl_unlock(); + + if (err) + goto err_free_msg; + + return genlmsg_reply(rsp, info); + +err_free_msg: + nlmsg_free(rsp); + return err; +} + +static int +netdev_nl_queue_dump_one(struct net_device *netdev, struct sk_buff *rsp, + const struct genl_info *info, + struct netdev_nl_dump_ctx *ctx) +{ + int err = 0; + int i; + + if (!(netdev->flags & IFF_UP)) + return err; + + for (i = ctx->rxq_idx; i < netdev->real_num_rx_queues;) { + err = netdev_nl_queue_fill_one(rsp, netdev, i, + NETDEV_QUEUE_TYPE_RX, info); + if (err) + return err; + ctx->rxq_idx = i++; + } + for (i = ctx->txq_idx; i < netdev->real_num_tx_queues;) { + err = netdev_nl_queue_fill_one(rsp, netdev, i, + NETDEV_QUEUE_TYPE_TX, info); + if (err) + return err; + ctx->txq_idx = i++; + } + + return err; +} + +int netdev_nl_queue_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct netdev_nl_dump_ctx *ctx = netdev_dump_ctx(cb); + const struct genl_info *info = genl_info_dump(cb); + struct net *net = sock_net(skb->sk); + struct net_device *netdev; + u32 ifindex = 0; + int err = 0; + + if (info->attrs[NETDEV_A_QUEUE_IFINDEX]) + ifindex = nla_get_u32(info->attrs[NETDEV_A_QUEUE_IFINDEX]); + + rtnl_lock(); + if (ifindex) { + netdev = __dev_get_by_index(net, ifindex); + if (netdev) + err = netdev_nl_queue_dump_one(netdev, skb, info, ctx); + else + err = -ENODEV; + } else { + for_each_netdev_dump(net, netdev, ctx->ifindex) { + err = netdev_nl_queue_dump_one(netdev, skb, info, ctx); + if (err < 0) + break; + ctx->rxq_idx = 0; + ctx->txq_idx = 0; + } + } + rtnl_unlock(); + + if (err != -EMSGSIZE) + return err; + + return skb->len; +} + static int netdev_genl_netdevice_event(struct notifier_block *nb, unsigned long event, void *ptr) { diff --git a/net/core/page_pool.c b/net/core/page_pool.c index dec544337236..4933762e5a6b 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -23,10 +23,12 @@ #include <trace/events/page_pool.h> +#include "page_pool_priv.h" + #define DEFER_TIME (msecs_to_jiffies(1000)) #define DEFER_WARN_INTERVAL (60 * HZ) -#define BIAS_MAX LONG_MAX +#define BIAS_MAX (LONG_MAX >> 1) #ifdef CONFIG_PAGE_POOL_STATS /* alloc_stat_inc is intended to be used in softirq context */ @@ -69,7 +71,7 @@ static const char pp_stats[][ETH_GSTRING_LEN] = { * is passed to this API which is filled in. The caller can then report * those stats to the user (perhaps via ethtool, debugfs, etc.). */ -bool page_pool_get_stats(struct page_pool *pool, +bool page_pool_get_stats(const struct page_pool *pool, struct page_pool_stats *stats) { int cpu = 0; @@ -173,7 +175,8 @@ static int page_pool_init(struct page_pool *pool, { unsigned int ring_qsize = 1024; /* Default */ - memcpy(&pool->p, params, sizeof(pool->p)); + memcpy(&pool->p, ¶ms->fast, sizeof(pool->p)); + memcpy(&pool->slow, ¶ms->slow, sizeof(pool->slow)); /* Validate only known flags were used */ if (pool->p.flags & ~(PP_FLAG_ALL)) @@ -211,6 +214,8 @@ static int page_pool_init(struct page_pool *pool, */ } + pool->has_init_callback = !!pool->slow.init_callback; + #ifdef CONFIG_PAGE_POOL_STATS pool->recycle_stats = alloc_percpu(struct page_pool_recycle_stats); if (!pool->recycle_stats) @@ -235,6 +240,18 @@ static int page_pool_init(struct page_pool *pool, return 0; } +static void page_pool_uninit(struct page_pool *pool) +{ + ptr_ring_cleanup(&pool->ring, NULL); + + if (pool->p.flags & PP_FLAG_DMA_MAP) + put_device(pool->p.dev); + +#ifdef CONFIG_PAGE_POOL_STATS + free_percpu(pool->recycle_stats); +#endif +} + /** * page_pool_create() - create a page pool. * @params: parameters, see struct page_pool_params @@ -249,13 +266,21 @@ struct page_pool *page_pool_create(const struct page_pool_params *params) return ERR_PTR(-ENOMEM); err = page_pool_init(pool, params); - if (err < 0) { - pr_warn("%s() gave up with errno %d\n", __func__, err); - kfree(pool); - return ERR_PTR(err); - } + if (err < 0) + goto err_free; + + err = page_pool_list(pool); + if (err) + goto err_uninit; return pool; + +err_uninit: + page_pool_uninit(pool); +err_free: + pr_warn("%s() gave up with errno %d\n", __func__, err); + kfree(pool); + return ERR_PTR(err); } EXPORT_SYMBOL(page_pool_create); @@ -388,8 +413,8 @@ static void page_pool_set_pp_info(struct page_pool *pool, * the overhead is negligible. */ page_pool_fragment_page(page, 1); - if (pool->p.init_callback) - pool->p.init_callback(page, pool->p.init_arg); + if (pool->has_init_callback) + pool->slow.init_callback(page, pool->slow.init_arg); } static void page_pool_clear_pp_info(struct page *page) @@ -504,7 +529,7 @@ EXPORT_SYMBOL(page_pool_alloc_pages); */ #define _distance(a, b) (s32)((a) - (b)) -static s32 page_pool_inflight(struct page_pool *pool) +s32 page_pool_inflight(const struct page_pool *pool, bool strict) { u32 release_cnt = atomic_read(&pool->pages_state_release_cnt); u32 hold_cnt = READ_ONCE(pool->pages_state_hold_cnt); @@ -512,27 +537,27 @@ static s32 page_pool_inflight(struct page_pool *pool) inflight = _distance(hold_cnt, release_cnt); - trace_page_pool_release(pool, inflight, hold_cnt, release_cnt); - WARN(inflight < 0, "Negative(%d) inflight packet-pages", inflight); + if (strict) { + trace_page_pool_release(pool, inflight, hold_cnt, release_cnt); + WARN(inflight < 0, "Negative(%d) inflight packet-pages", + inflight); + } else { + inflight = max(0, inflight); + } return inflight; } -/* Disconnects a page (from a page_pool). API users can have a need - * to disconnect a page (from a page_pool), to allow it to be used as - * a regular page (that will eventually be returned to the normal - * page-allocator via put_page). - */ -static void page_pool_return_page(struct page_pool *pool, struct page *page) +static __always_inline +void __page_pool_release_page_dma(struct page_pool *pool, struct page *page) { dma_addr_t dma; - int count; if (!(pool->p.flags & PP_FLAG_DMA_MAP)) /* Always account for inflight pages, even if we didn't * map them */ - goto skip_dma_unmap; + return; dma = page_pool_get_dma_addr(page); @@ -541,7 +566,19 @@ static void page_pool_return_page(struct page_pool *pool, struct page *page) PAGE_SIZE << pool->p.order, pool->p.dma_dir, DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); page_pool_set_dma_addr(page, 0); -skip_dma_unmap: +} + +/* Disconnects a page (from a page_pool). API users can have a need + * to disconnect a page (from a page_pool), to allow it to be used as + * a regular page (that will eventually be returned to the normal + * page-allocator via put_page). + */ +void page_pool_return_page(struct page_pool *pool, struct page *page) +{ + int count; + + __page_pool_release_page_dma(pool, page); + page_pool_clear_pp_info(page); /* This may be the last page returned, releasing the pool, so @@ -647,8 +684,8 @@ __page_pool_put_page(struct page_pool *pool, struct page *page, return NULL; } -void page_pool_put_defragged_page(struct page_pool *pool, struct page *page, - unsigned int dma_sync_size, bool allow_direct) +void page_pool_put_unrefed_page(struct page_pool *pool, struct page *page, + unsigned int dma_sync_size, bool allow_direct) { page = __page_pool_put_page(pool, page, dma_sync_size, allow_direct); if (page && !page_pool_recycle_in_ring(pool, page)) { @@ -657,7 +694,7 @@ void page_pool_put_defragged_page(struct page_pool *pool, struct page *page, page_pool_return_page(pool, page); } } -EXPORT_SYMBOL(page_pool_put_defragged_page); +EXPORT_SYMBOL(page_pool_put_unrefed_page); /** * page_pool_put_page_bulk() - release references on multiple pages @@ -684,7 +721,7 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data, struct page *page = virt_to_head_page(data[i]); /* It is not the last user for the page frag case */ - if (!page_pool_is_last_frag(page)) + if (!page_pool_is_last_ref(page)) continue; page = __page_pool_put_page(pool, page, -1, false); @@ -726,7 +763,7 @@ static struct page *page_pool_drain_frag(struct page_pool *pool, long drain_count = BIAS_MAX - pool->frag_users; /* Some user is still using the page frag */ - if (likely(page_pool_defrag_page(page, drain_count))) + if (likely(page_pool_unref_page(page, drain_count))) return NULL; if (page_ref_count(page) == 1 && !page_is_pfmemalloc(page)) { @@ -747,7 +784,7 @@ static void page_pool_free_frag(struct page_pool *pool) pool->frag_page = NULL; - if (!page || page_pool_defrag_page(page, drain_count)) + if (!page || page_pool_unref_page(page, drain_count)) return; page_pool_return_page(pool, page); @@ -818,14 +855,8 @@ static void __page_pool_destroy(struct page_pool *pool) if (pool->disconnect) pool->disconnect(pool); - ptr_ring_cleanup(&pool->ring, NULL); - - if (pool->p.flags & PP_FLAG_DMA_MAP) - put_device(pool->p.dev); - -#ifdef CONFIG_PAGE_POOL_STATS - free_percpu(pool->recycle_stats); -#endif + page_pool_unlist(pool); + page_pool_uninit(pool); kfree(pool); } @@ -862,7 +893,7 @@ static int page_pool_release(struct page_pool *pool) int inflight; page_pool_scrub(pool); - inflight = page_pool_inflight(pool); + inflight = page_pool_inflight(pool, true); if (!inflight) __page_pool_destroy(pool); @@ -873,18 +904,21 @@ static void page_pool_release_retry(struct work_struct *wq) { struct delayed_work *dwq = to_delayed_work(wq); struct page_pool *pool = container_of(dwq, typeof(*pool), release_dw); + void *netdev; int inflight; inflight = page_pool_release(pool); if (!inflight) return; - /* Periodic warning */ - if (time_after_eq(jiffies, pool->defer_warn)) { + /* Periodic warning for page pools the user can't see */ + netdev = READ_ONCE(pool->slow.netdev); + if (time_after_eq(jiffies, pool->defer_warn) && + (!netdev || netdev == NET_PTR_POISON)) { int sec = (s32)((u32)jiffies - (u32)pool->defer_start) / HZ; - pr_warn("%s() stalled pool shutdown %d inflight %d sec\n", - __func__, inflight, sec); + pr_warn("%s() stalled pool shutdown: id %u, %d inflight %d sec\n", + __func__, pool->user.id, inflight, sec); pool->defer_warn = jiffies + DEFER_WARN_INTERVAL; } @@ -929,6 +963,7 @@ void page_pool_destroy(struct page_pool *pool) if (!page_pool_release(pool)) return; + page_pool_detached(pool); pool->defer_start = jiffies; pool->defer_warn = jiffies + DEFER_WARN_INTERVAL; diff --git a/net/core/page_pool_priv.h b/net/core/page_pool_priv.h new file mode 100644 index 000000000000..90665d40f1eb --- /dev/null +++ b/net/core/page_pool_priv.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __PAGE_POOL_PRIV_H +#define __PAGE_POOL_PRIV_H + +s32 page_pool_inflight(const struct page_pool *pool, bool strict); + +int page_pool_list(struct page_pool *pool); +void page_pool_detached(struct page_pool *pool); +void page_pool_unlist(struct page_pool *pool); + +#endif diff --git a/net/core/page_pool_user.c b/net/core/page_pool_user.c new file mode 100644 index 000000000000..ffe5244e5597 --- /dev/null +++ b/net/core/page_pool_user.c @@ -0,0 +1,410 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/mutex.h> +#include <linux/netdevice.h> +#include <linux/xarray.h> +#include <net/net_debug.h> +#include <net/page_pool/types.h> +#include <net/page_pool/helpers.h> +#include <net/sock.h> + +#include "page_pool_priv.h" +#include "netdev-genl-gen.h" + +static DEFINE_XARRAY_FLAGS(page_pools, XA_FLAGS_ALLOC1); +/* Protects: page_pools, netdevice->page_pools, pool->slow.netdev, pool->user. + * Ordering: inside rtnl_lock + */ +static DEFINE_MUTEX(page_pools_lock); + +/* Page pools are only reachable from user space (via netlink) if they are + * linked to a netdev at creation time. Following page pool "visibility" + * states are possible: + * - normal + * - user.list: linked to real netdev, netdev: real netdev + * - orphaned - real netdev has disappeared + * - user.list: linked to lo, netdev: lo + * - invisible - either (a) created without netdev linking, (b) unlisted due + * to error, or (c) the entire namespace which owned this pool disappeared + * - user.list: unhashed, netdev: unknown + */ + +typedef int (*pp_nl_fill_cb)(struct sk_buff *rsp, const struct page_pool *pool, + const struct genl_info *info); + +static int +netdev_nl_page_pool_get_do(struct genl_info *info, u32 id, pp_nl_fill_cb fill) +{ + struct page_pool *pool; + struct sk_buff *rsp; + int err; + + mutex_lock(&page_pools_lock); + pool = xa_load(&page_pools, id); + if (!pool || hlist_unhashed(&pool->user.list) || + !net_eq(dev_net(pool->slow.netdev), genl_info_net(info))) { + err = -ENOENT; + goto err_unlock; + } + + rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!rsp) { + err = -ENOMEM; + goto err_unlock; + } + + err = fill(rsp, pool, info); + if (err) + goto err_free_msg; + + mutex_unlock(&page_pools_lock); + + return genlmsg_reply(rsp, info); + +err_free_msg: + nlmsg_free(rsp); +err_unlock: + mutex_unlock(&page_pools_lock); + return err; +} + +struct page_pool_dump_cb { + unsigned long ifindex; + u32 pp_id; +}; + +static int +netdev_nl_page_pool_get_dump(struct sk_buff *skb, struct netlink_callback *cb, + pp_nl_fill_cb fill) +{ + struct page_pool_dump_cb *state = (void *)cb->ctx; + const struct genl_info *info = genl_info_dump(cb); + struct net *net = sock_net(skb->sk); + struct net_device *netdev; + struct page_pool *pool; + int err = 0; + + rtnl_lock(); + mutex_lock(&page_pools_lock); + for_each_netdev_dump(net, netdev, state->ifindex) { + hlist_for_each_entry(pool, &netdev->page_pools, user.list) { + if (state->pp_id && state->pp_id < pool->user.id) + continue; + + state->pp_id = pool->user.id; + err = fill(skb, pool, info); + if (err) + break; + } + + state->pp_id = 0; + } + mutex_unlock(&page_pools_lock); + rtnl_unlock(); + + if (skb->len && err == -EMSGSIZE) + return skb->len; + return err; +} + +static int +page_pool_nl_stats_fill(struct sk_buff *rsp, const struct page_pool *pool, + const struct genl_info *info) +{ +#ifdef CONFIG_PAGE_POOL_STATS + struct page_pool_stats stats = {}; + struct nlattr *nest; + void *hdr; + + if (!page_pool_get_stats(pool, &stats)) + return 0; + + hdr = genlmsg_iput(rsp, info); + if (!hdr) + return -EMSGSIZE; + + nest = nla_nest_start(rsp, NETDEV_A_PAGE_POOL_STATS_INFO); + + if (nla_put_uint(rsp, NETDEV_A_PAGE_POOL_ID, pool->user.id) || + (pool->slow.netdev->ifindex != LOOPBACK_IFINDEX && + nla_put_u32(rsp, NETDEV_A_PAGE_POOL_IFINDEX, + pool->slow.netdev->ifindex))) + goto err_cancel_nest; + + nla_nest_end(rsp, nest); + + if (nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_ALLOC_FAST, + stats.alloc_stats.fast) || + nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_ALLOC_SLOW, + stats.alloc_stats.slow) || + nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_ALLOC_SLOW_HIGH_ORDER, + stats.alloc_stats.slow_high_order) || + nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_ALLOC_EMPTY, + stats.alloc_stats.empty) || + nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_ALLOC_REFILL, + stats.alloc_stats.refill) || + nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_ALLOC_WAIVE, + stats.alloc_stats.waive) || + nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_RECYCLE_CACHED, + stats.recycle_stats.cached) || + nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_RECYCLE_CACHE_FULL, + stats.recycle_stats.cache_full) || + nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_RECYCLE_RING, + stats.recycle_stats.ring) || + nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_RECYCLE_RING_FULL, + stats.recycle_stats.ring_full) || + nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_RECYCLE_RELEASED_REFCNT, + stats.recycle_stats.released_refcnt)) + goto err_cancel_msg; + + genlmsg_end(rsp, hdr); + + return 0; +err_cancel_nest: + nla_nest_cancel(rsp, nest); +err_cancel_msg: + genlmsg_cancel(rsp, hdr); + return -EMSGSIZE; +#else + GENL_SET_ERR_MSG(info, "kernel built without CONFIG_PAGE_POOL_STATS"); + return -EOPNOTSUPP; +#endif +} + +int netdev_nl_page_pool_stats_get_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct nlattr *tb[ARRAY_SIZE(netdev_page_pool_info_nl_policy)]; + struct nlattr *nest; + int err; + u32 id; + + if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_PAGE_POOL_STATS_INFO)) + return -EINVAL; + + nest = info->attrs[NETDEV_A_PAGE_POOL_STATS_INFO]; + err = nla_parse_nested(tb, ARRAY_SIZE(tb) - 1, nest, + netdev_page_pool_info_nl_policy, + info->extack); + if (err) + return err; + + if (NL_REQ_ATTR_CHECK(info->extack, nest, tb, NETDEV_A_PAGE_POOL_ID)) + return -EINVAL; + if (tb[NETDEV_A_PAGE_POOL_IFINDEX]) { + NL_SET_ERR_MSG_ATTR(info->extack, + tb[NETDEV_A_PAGE_POOL_IFINDEX], + "selecting by ifindex not supported"); + return -EINVAL; + } + + id = nla_get_uint(tb[NETDEV_A_PAGE_POOL_ID]); + + return netdev_nl_page_pool_get_do(info, id, page_pool_nl_stats_fill); +} + +int netdev_nl_page_pool_stats_get_dumpit(struct sk_buff *skb, + struct netlink_callback *cb) +{ + return netdev_nl_page_pool_get_dump(skb, cb, page_pool_nl_stats_fill); +} + +static int +page_pool_nl_fill(struct sk_buff *rsp, const struct page_pool *pool, + const struct genl_info *info) +{ + size_t inflight, refsz; + void *hdr; + + hdr = genlmsg_iput(rsp, info); + if (!hdr) + return -EMSGSIZE; + + if (nla_put_uint(rsp, NETDEV_A_PAGE_POOL_ID, pool->user.id)) + goto err_cancel; + + if (pool->slow.netdev->ifindex != LOOPBACK_IFINDEX && + nla_put_u32(rsp, NETDEV_A_PAGE_POOL_IFINDEX, + pool->slow.netdev->ifindex)) + goto err_cancel; + if (pool->user.napi_id && + nla_put_uint(rsp, NETDEV_A_PAGE_POOL_NAPI_ID, pool->user.napi_id)) + goto err_cancel; + + inflight = page_pool_inflight(pool, false); + refsz = PAGE_SIZE << pool->p.order; + if (nla_put_uint(rsp, NETDEV_A_PAGE_POOL_INFLIGHT, inflight) || + nla_put_uint(rsp, NETDEV_A_PAGE_POOL_INFLIGHT_MEM, + inflight * refsz)) + goto err_cancel; + if (pool->user.detach_time && + nla_put_uint(rsp, NETDEV_A_PAGE_POOL_DETACH_TIME, + pool->user.detach_time)) + goto err_cancel; + + genlmsg_end(rsp, hdr); + + return 0; +err_cancel: + genlmsg_cancel(rsp, hdr); + return -EMSGSIZE; +} + +static void netdev_nl_page_pool_event(const struct page_pool *pool, u32 cmd) +{ + struct genl_info info; + struct sk_buff *ntf; + struct net *net; + + lockdep_assert_held(&page_pools_lock); + + /* 'invisible' page pools don't matter */ + if (hlist_unhashed(&pool->user.list)) + return; + net = dev_net(pool->slow.netdev); + + if (!genl_has_listeners(&netdev_nl_family, net, NETDEV_NLGRP_PAGE_POOL)) + return; + + genl_info_init_ntf(&info, &netdev_nl_family, cmd); + + ntf = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!ntf) + return; + + if (page_pool_nl_fill(ntf, pool, &info)) { + nlmsg_free(ntf); + return; + } + + genlmsg_multicast_netns(&netdev_nl_family, net, ntf, + 0, NETDEV_NLGRP_PAGE_POOL, GFP_KERNEL); +} + +int netdev_nl_page_pool_get_doit(struct sk_buff *skb, struct genl_info *info) +{ + u32 id; + + if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_PAGE_POOL_ID)) + return -EINVAL; + + id = nla_get_uint(info->attrs[NETDEV_A_PAGE_POOL_ID]); + + return netdev_nl_page_pool_get_do(info, id, page_pool_nl_fill); +} + +int netdev_nl_page_pool_get_dumpit(struct sk_buff *skb, + struct netlink_callback *cb) +{ + return netdev_nl_page_pool_get_dump(skb, cb, page_pool_nl_fill); +} + +int page_pool_list(struct page_pool *pool) +{ + static u32 id_alloc_next; + int err; + + mutex_lock(&page_pools_lock); + err = xa_alloc_cyclic(&page_pools, &pool->user.id, pool, xa_limit_32b, + &id_alloc_next, GFP_KERNEL); + if (err < 0) + goto err_unlock; + + INIT_HLIST_NODE(&pool->user.list); + if (pool->slow.netdev) { + hlist_add_head(&pool->user.list, + &pool->slow.netdev->page_pools); + pool->user.napi_id = pool->p.napi ? pool->p.napi->napi_id : 0; + + netdev_nl_page_pool_event(pool, NETDEV_CMD_PAGE_POOL_ADD_NTF); + } + + mutex_unlock(&page_pools_lock); + return 0; + +err_unlock: + mutex_unlock(&page_pools_lock); + return err; +} + +void page_pool_detached(struct page_pool *pool) +{ + mutex_lock(&page_pools_lock); + pool->user.detach_time = ktime_get_boottime_seconds(); + netdev_nl_page_pool_event(pool, NETDEV_CMD_PAGE_POOL_CHANGE_NTF); + mutex_unlock(&page_pools_lock); +} + +void page_pool_unlist(struct page_pool *pool) +{ + mutex_lock(&page_pools_lock); + netdev_nl_page_pool_event(pool, NETDEV_CMD_PAGE_POOL_DEL_NTF); + xa_erase(&page_pools, pool->user.id); + if (!hlist_unhashed(&pool->user.list)) + hlist_del(&pool->user.list); + mutex_unlock(&page_pools_lock); +} + +static void page_pool_unreg_netdev_wipe(struct net_device *netdev) +{ + struct page_pool *pool; + struct hlist_node *n; + + mutex_lock(&page_pools_lock); + hlist_for_each_entry_safe(pool, n, &netdev->page_pools, user.list) { + hlist_del_init(&pool->user.list); + pool->slow.netdev = NET_PTR_POISON; + } + mutex_unlock(&page_pools_lock); +} + +static void page_pool_unreg_netdev(struct net_device *netdev) +{ + struct page_pool *pool, *last; + struct net_device *lo; + + lo = dev_net(netdev)->loopback_dev; + + mutex_lock(&page_pools_lock); + last = NULL; + hlist_for_each_entry(pool, &netdev->page_pools, user.list) { + pool->slow.netdev = lo; + netdev_nl_page_pool_event(pool, + NETDEV_CMD_PAGE_POOL_CHANGE_NTF); + last = pool; + } + if (last) + hlist_splice_init(&netdev->page_pools, &last->user.list, + &lo->page_pools); + mutex_unlock(&page_pools_lock); +} + +static int +page_pool_netdevice_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct net_device *netdev = netdev_notifier_info_to_dev(ptr); + + if (event != NETDEV_UNREGISTER) + return NOTIFY_DONE; + + if (hlist_empty(&netdev->page_pools)) + return NOTIFY_OK; + + if (netdev->ifindex != LOOPBACK_IFINDEX) + page_pool_unreg_netdev(netdev); + else + page_pool_unreg_netdev_wipe(netdev); + return NOTIFY_OK; +} + +static struct notifier_block page_pool_netdevice_nb = { + .notifier_call = page_pool_netdevice_event, +}; + +static int __init page_pool_user_init(void) +{ + return register_netdevice_notifier(&page_pool_netdevice_nb); +} + +subsys_initcall(page_pool_user_init); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index e8431c6c8490..5f6ed6da3cfc 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -342,8 +342,7 @@ int rtnl_unregister(int protocol, int msgtype) return -ENOENT; } - link = rtnl_dereference(tab[msgindex]); - RCU_INIT_POINTER(tab[msgindex], NULL); + link = rcu_replace_pointer_rtnl(tab[msgindex], NULL); rtnl_unlock(); kfree_rcu(link, rcu); @@ -368,18 +367,13 @@ void rtnl_unregister_all(int protocol) BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX); rtnl_lock(); - tab = rtnl_dereference(rtnl_msg_handlers[protocol]); + tab = rcu_replace_pointer_rtnl(rtnl_msg_handlers[protocol], NULL); if (!tab) { rtnl_unlock(); return; } - RCU_INIT_POINTER(rtnl_msg_handlers[protocol], NULL); for (msgindex = 0; msgindex < RTM_NR_MSGTYPES; msgindex++) { - link = rtnl_dereference(tab[msgindex]); - if (!link) - continue; - - RCU_INIT_POINTER(tab[msgindex], NULL); + link = rcu_replace_pointer_rtnl(tab[msgindex], NULL); kfree_rcu(link, rcu); } rtnl_unlock(); @@ -3849,10 +3843,18 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh, goto out; err = -ENOBUFS; - nskb = nlmsg_new(if_nlmsg_size(dev, ext_filter_mask), GFP_KERNEL); + nskb = nlmsg_new_large(if_nlmsg_size(dev, ext_filter_mask)); if (nskb == NULL) goto out; + /* Synchronize the carrier state so we don't report a state + * that we're not actually going to honour immediately; if + * the driver just did a carrier off->on transition, we can + * only TX if link watch work has run, but without this we'd + * already report carrier on, even if it doesn't work yet. + */ + linkwatch_sync_dev(dev); + err = rtnl_fill_ifinfo(nskb, dev, net, RTM_NEWLINK, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, 0, ext_filter_mask, @@ -6408,17 +6410,64 @@ static int rtnl_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, return dev->netdev_ops->ndo_mdb_add(dev, tb, nlh->nlmsg_flags, extack); } +static int rtnl_validate_mdb_entry_del_bulk(const struct nlattr *attr, + struct netlink_ext_ack *extack) +{ + struct br_mdb_entry *entry = nla_data(attr); + struct br_mdb_entry zero_entry = {}; + + if (nla_len(attr) != sizeof(struct br_mdb_entry)) { + NL_SET_ERR_MSG_ATTR(extack, attr, "Invalid attribute length"); + return -EINVAL; + } + + if (entry->state != MDB_PERMANENT && entry->state != MDB_TEMPORARY) { + NL_SET_ERR_MSG(extack, "Unknown entry state"); + return -EINVAL; + } + + if (entry->flags) { + NL_SET_ERR_MSG(extack, "Entry flags cannot be set"); + return -EINVAL; + } + + if (entry->vid >= VLAN_N_VID - 1) { + NL_SET_ERR_MSG(extack, "Invalid entry VLAN id"); + return -EINVAL; + } + + if (memcmp(&entry->addr, &zero_entry.addr, sizeof(entry->addr))) { + NL_SET_ERR_MSG(extack, "Entry address cannot be set"); + return -EINVAL; + } + + return 0; +} + +static const struct nla_policy mdba_del_bulk_policy[MDBA_SET_ENTRY_MAX + 1] = { + [MDBA_SET_ENTRY] = NLA_POLICY_VALIDATE_FN(NLA_BINARY, + rtnl_validate_mdb_entry_del_bulk, + sizeof(struct br_mdb_entry)), + [MDBA_SET_ENTRY_ATTRS] = { .type = NLA_NESTED }, +}; + static int rtnl_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { + bool del_bulk = !!(nlh->nlmsg_flags & NLM_F_BULK); struct nlattr *tb[MDBA_SET_ENTRY_MAX + 1]; struct net *net = sock_net(skb->sk); struct br_port_msg *bpm; struct net_device *dev; int err; - err = nlmsg_parse_deprecated(nlh, sizeof(*bpm), tb, - MDBA_SET_ENTRY_MAX, mdba_policy, extack); + if (!del_bulk) + err = nlmsg_parse_deprecated(nlh, sizeof(*bpm), tb, + MDBA_SET_ENTRY_MAX, mdba_policy, + extack); + else + err = nlmsg_parse(nlh, sizeof(*bpm), tb, MDBA_SET_ENTRY_MAX, + mdba_del_bulk_policy, extack); if (err) return err; @@ -6439,6 +6488,14 @@ static int rtnl_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, return -EINVAL; } + if (del_bulk) { + if (!dev->netdev_ops->ndo_mdb_del_bulk) { + NL_SET_ERR_MSG(extack, "Device does not support MDB bulk deletion"); + return -EOPNOTSUPP; + } + return dev->netdev_ops->ndo_mdb_del_bulk(dev, tb, extack); + } + if (!dev->netdev_ops->ndo_mdb_del) { NL_SET_ERR_MSG(extack, "Device does not support MDB operations"); return -EOPNOTSUPP; @@ -6684,5 +6741,6 @@ void __init rtnetlink_init(void) rtnl_register(PF_BRIDGE, RTM_GETMDB, rtnl_mdb_get, rtnl_mdb_dump, 0); rtnl_register(PF_BRIDGE, RTM_NEWMDB, rtnl_mdb_add, NULL, 0); - rtnl_register(PF_BRIDGE, RTM_DELMDB, rtnl_mdb_del, NULL, 0); + rtnl_register(PF_BRIDGE, RTM_DELMDB, rtnl_mdb_del, NULL, + RTNL_FLAG_BULK_DEL_SUPPORTED); } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 94cc40a6f797..ce5687ddb768 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -890,6 +890,11 @@ static void skb_clone_fraglist(struct sk_buff *skb) skb_get(list); } +static bool is_pp_page(struct page *page) +{ + return (page->pp_magic & ~0x3UL) == PP_SIGNATURE; +} + #if IS_ENABLED(CONFIG_PAGE_POOL) bool napi_pp_put_page(struct page *page, bool napi_safe) { @@ -905,7 +910,7 @@ bool napi_pp_put_page(struct page *page, bool napi_safe) * and page_is_pfmemalloc() is checked in __page_pool_put_page() * to avoid recycling the pfmemalloc page. */ - if (unlikely((page->pp_magic & ~0x3UL) != PP_SIGNATURE)) + if (unlikely(!is_pp_page(page))) return false; pp = page->pp; @@ -942,6 +947,37 @@ static bool skb_pp_recycle(struct sk_buff *skb, void *data, bool napi_safe) return napi_pp_put_page(virt_to_page(data), napi_safe); } +/** + * skb_pp_frag_ref() - Increase fragment references of a page pool aware skb + * @skb: page pool aware skb + * + * Increase the fragment reference count (pp_ref_count) of a skb. This is + * intended to gain fragment references only for page pool aware skbs, + * i.e. when skb->pp_recycle is true, and not for fragments in a + * non-pp-recycling skb. It has a fallback to increase references on normal + * pages, as page pool aware skbs may also have normal page fragments. + */ +static int skb_pp_frag_ref(struct sk_buff *skb) +{ + struct skb_shared_info *shinfo; + struct page *head_page; + int i; + + if (!skb->pp_recycle) + return -EINVAL; + + shinfo = skb_shinfo(skb); + + for (i = 0; i < shinfo->nr_frags; i++) { + head_page = compound_head(skb_frag_page(&shinfo->frags[i])); + if (likely(is_pp_page(head_page))) + page_pool_ref_page(head_page); + else + page_ref_inc(head_page); + } + return 0; +} + static void skb_kfree_head(void *head, unsigned int end_offset) { if (end_offset == SKB_SMALL_HEAD_HEADROOM) @@ -5767,17 +5803,12 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, return false; /* In general, avoid mixing page_pool and non-page_pool allocated - * pages within the same SKB. Additionally avoid dealing with clones - * with page_pool pages, in case the SKB is using page_pool fragment - * references (page_pool_alloc_frag()). Since we only take full page - * references for cloned SKBs at the moment that would result in - * inconsistent reference counts. - * In theory we could take full references if @from is cloned and - * !@to->pp_recycle but its tricky (due to potential race with - * the clone disappearing) and rare, so not worth dealing with. + * pages within the same SKB. In theory we could take full + * references if @from is cloned and !@to->pp_recycle but its + * tricky (due to potential race with the clone disappearing) and + * rare, so not worth dealing with. */ - if (to->pp_recycle != from->pp_recycle || - (from->pp_recycle && skb_cloned(from))) + if (to->pp_recycle != from->pp_recycle) return false; if (len <= skb_tailroom(to)) { @@ -5834,8 +5865,10 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, /* if the skb is not cloned this does nothing * since we set nr_frags to 0. */ - for (i = 0; i < from_shinfo->nr_frags; i++) - __skb_frag_ref(&from_shinfo->frags[i]); + if (skb_pp_frag_ref(from)) { + for (i = 0; i < from_shinfo->nr_frags; i++) + __skb_frag_ref(&from_shinfo->frags[i]); + } to->truesize += delta; to->len += len; diff --git a/net/core/sock.c b/net/core/sock.c index fef349dd72fa..446e945f736b 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -283,10 +283,6 @@ EXPORT_SYMBOL(sysctl_rmem_max); __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX; __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX; -/* Maximal space eaten by iovec or ancillary data plus some space */ -int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512); -EXPORT_SYMBOL(sysctl_optmem_max); - int sysctl_tstamp_allow_data __read_mostly = 1; DEFINE_STATIC_KEY_FALSE(memalloc_socks_key); @@ -2651,7 +2647,7 @@ struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size, /* small safe race: SKB_TRUESIZE may differ from final skb->truesize */ if (atomic_read(&sk->sk_omem_alloc) + SKB_TRUESIZE(size) > - READ_ONCE(sysctl_optmem_max)) + READ_ONCE(sock_net(sk)->core.sysctl_optmem_max)) return NULL; skb = alloc_skb(size, priority); @@ -2669,7 +2665,7 @@ struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size, */ void *sock_kmalloc(struct sock *sk, int size, gfp_t priority) { - int optmem_max = READ_ONCE(sysctl_optmem_max); + int optmem_max = READ_ONCE(sock_net(sk)->core.sysctl_optmem_max); if ((unsigned int)size <= optmem_max && atomic_read(&sk->sk_omem_alloc) + size < optmem_max) { diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 03f1edb948d7..0f0cb1465e08 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -509,13 +509,6 @@ static struct ctl_table net_core_table[] = { .proc_handler = proc_dointvec, }, { - .procname = "optmem_max", - .data = &sysctl_optmem_max, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { .procname = "tstamp_allow_data", .data = &sysctl_tstamp_allow_data, .maxlen = sizeof(int), @@ -674,6 +667,14 @@ static struct ctl_table netns_core_table[] = { .proc_handler = proc_dointvec_minmax }, { + .procname = "optmem_max", + .data = &init_net.core.sysctl_optmem_max, + .maxlen = sizeof(int), + .mode = 0644, + .extra1 = SYSCTL_ZERO, + .proc_handler = proc_dointvec_minmax + }, + { .procname = "txrehash", .data = &init_net.core.sysctl_txrehash, .maxlen = sizeof(u8), diff --git a/net/core/xdp.c b/net/core/xdp.c index b6f1d6dab3f2..4869c1c2d8f3 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -736,6 +736,39 @@ __bpf_kfunc int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash, return -EOPNOTSUPP; } +/** + * bpf_xdp_metadata_rx_vlan_tag - Get XDP packet outermost VLAN tag + * @ctx: XDP context pointer. + * @vlan_proto: Destination pointer for VLAN Tag protocol identifier (TPID). + * @vlan_tci: Destination pointer for VLAN TCI (VID + DEI + PCP) + * + * In case of success, ``vlan_proto`` contains *Tag protocol identifier (TPID)*, + * usually ``ETH_P_8021Q`` or ``ETH_P_8021AD``, but some networks can use + * custom TPIDs. ``vlan_proto`` is stored in **network byte order (BE)** + * and should be used as follows: + * ``if (vlan_proto == bpf_htons(ETH_P_8021Q)) do_something();`` + * + * ``vlan_tci`` contains the remaining 16 bits of a VLAN tag. + * Driver is expected to provide those in **host byte order (usually LE)**, + * so the bpf program should not perform byte conversion. + * According to 802.1Q standard, *VLAN TCI (Tag control information)* + * is a bit field that contains: + * *VLAN identifier (VID)* that can be read with ``vlan_tci & 0xfff``, + * *Drop eligible indicator (DEI)* - 1 bit, + * *Priority code point (PCP)* - 3 bits. + * For detailed meaning of DEI and PCP, please refer to other sources. + * + * Return: + * * Returns 0 on success or ``-errno`` on error. + * * ``-EOPNOTSUPP`` : device driver doesn't implement kfunc + * * ``-ENODATA`` : VLAN tag was not stripped or is not available + */ +__bpf_kfunc int bpf_xdp_metadata_rx_vlan_tag(const struct xdp_md *ctx, + __be16 *vlan_proto, u16 *vlan_tci) +{ + return -EOPNOTSUPP; +} + __bpf_kfunc_end_defs(); BTF_SET8_START(xdp_metadata_kfunc_ids) diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 4550b680665a..06d7324276ec 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -669,7 +669,7 @@ discard: ipv6_pktoptions: if (!((1 << sk->sk_state) & (DCCPF_CLOSED | DCCPF_LISTEN))) { if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo) - np->mcast_oif = inet6_iif(opt_skb); + WRITE_ONCE(np->mcast_oif, inet6_iif(opt_skb)); if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) WRITE_ONCE(np->mcast_hops, ipv6_hdr(opt_skb)->hop_limit); if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass) diff --git a/net/devlink/core.c b/net/devlink/core.c index 6984877e9f10..4275a2bc6d8e 100644 --- a/net/devlink/core.c +++ b/net/devlink/core.c @@ -503,14 +503,14 @@ static void __net_exit devlink_pernet_pre_exit(struct net *net) * all devlink instances from this namespace into init_net. */ devlinks_xa_for_each_registered_get(net, index, devlink) { - devl_lock(devlink); + devl_dev_lock(devlink, true); err = 0; if (devl_is_registered(devlink)) err = devlink_reload(devlink, &init_net, DEVLINK_RELOAD_ACTION_DRIVER_REINIT, DEVLINK_RELOAD_LIMIT_UNSPEC, &actions_performed, NULL); - devl_unlock(devlink); + devl_dev_unlock(devlink, true); devlink_put(devlink); if (err && err != -EOPNOTSUPP) pr_warn("Failed to reload devlink instance into init_net\n"); diff --git a/net/devlink/dev.c b/net/devlink/dev.c index 4fc7adb32663..19dbf540748a 100644 --- a/net/devlink/dev.c +++ b/net/devlink/dev.c @@ -4,6 +4,7 @@ * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com> */ +#include <linux/device.h> #include <net/genetlink.h> #include <net/sock.h> #include "devl_internal.h" @@ -201,7 +202,10 @@ static void devlink_notify(struct devlink *devlink, enum devlink_command cmd) int err; WARN_ON(cmd != DEVLINK_CMD_NEW && cmd != DEVLINK_CMD_DEL); - WARN_ON(!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED)); + WARN_ON(!devl_is_registered(devlink)); + + if (!devlink_nl_notify_need(devlink)) + return; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) @@ -213,8 +217,7 @@ static void devlink_notify(struct devlink *devlink, enum devlink_command cmd) return; } - genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), - msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); + devlink_nl_notify_send(devlink, msg); } int devlink_nl_get_doit(struct sk_buff *skb, struct genl_info *info) @@ -424,6 +427,18 @@ static void devlink_reload_netns_change(struct devlink *devlink, devlink_rel_nested_in_notify(devlink); } +static void devlink_reload_reinit_sanity_check(struct devlink *devlink) +{ + WARN_ON(!list_empty(&devlink->trap_policer_list)); + WARN_ON(!list_empty(&devlink->trap_group_list)); + WARN_ON(!list_empty(&devlink->trap_list)); + WARN_ON(!list_empty(&devlink->dpipe_table_list)); + WARN_ON(!list_empty(&devlink->sb_list)); + WARN_ON(!list_empty(&devlink->rate_list)); + WARN_ON(!list_empty(&devlink->linecard_list)); + WARN_ON(!xa_empty(&devlink->ports)); +} + int devlink_reload(struct devlink *devlink, struct net *dest_net, enum devlink_reload_action action, enum devlink_reload_limit limit, @@ -433,6 +448,13 @@ int devlink_reload(struct devlink *devlink, struct net *dest_net, struct net *curr_net; int err; + /* Make sure the reload operations are invoked with the device lock + * held to allow drivers to trigger functionality that expects it + * (e.g., PCI reset) and to close possible races between these + * operations and probe/remove. + */ + device_lock_assert(devlink->dev); + memcpy(remote_reload_stats, devlink->stats.remote_reload_stats, sizeof(remote_reload_stats)); @@ -444,8 +466,10 @@ int devlink_reload(struct devlink *devlink, struct net *dest_net, if (dest_net && !net_eq(dest_net, curr_net)) devlink_reload_netns_change(devlink, curr_net, dest_net); - if (action == DEVLINK_RELOAD_ACTION_DRIVER_REINIT) + if (action == DEVLINK_RELOAD_ACTION_DRIVER_REINIT) { devlink_params_driverinit_load_new(devlink); + devlink_reload_reinit_sanity_check(devlink); + } err = devlink->ops->reload_up(devlink, action, limit, actions_performed, extack); devlink_reload_failed_set(devlink, !!err); @@ -977,7 +1001,7 @@ static void __devlink_flash_update_notify(struct devlink *devlink, cmd != DEVLINK_CMD_FLASH_UPDATE_END && cmd != DEVLINK_CMD_FLASH_UPDATE_STATUS); - if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED)) + if (!devl_is_registered(devlink) || !devlink_nl_notify_need(devlink)) return; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); @@ -988,8 +1012,7 @@ static void __devlink_flash_update_notify(struct devlink *devlink, if (err) goto out_free_msg; - genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), - msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); + devlink_nl_notify_send(devlink, msg); return; out_free_msg: diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h index 183dbe3807ab..c7a8e13f917c 100644 --- a/net/devlink/devl_internal.h +++ b/net/devlink/devl_internal.h @@ -3,6 +3,7 @@ * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com> */ +#include <linux/device.h> #include <linux/etherdevice.h> #include <linux/mutex.h> #include <linux/netdevice.h> @@ -90,10 +91,29 @@ extern struct genl_family devlink_nl_family; struct devlink *devlinks_xa_find_get(struct net *net, unsigned long *indexp); +static inline bool __devl_is_registered(struct devlink *devlink) +{ + return xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED); +} + static inline bool devl_is_registered(struct devlink *devlink) { devl_assert_locked(devlink); - return xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED); + return __devl_is_registered(devlink); +} + +static inline void devl_dev_lock(struct devlink *devlink, bool dev_lock) +{ + if (dev_lock) + device_lock(devlink->dev); + devl_lock(devlink); +} + +static inline void devl_dev_unlock(struct devlink *devlink, bool dev_lock) +{ + devl_unlock(devlink); + if (dev_lock) + device_unlock(devlink->dev); } typedef void devlink_rel_notify_cb_t(struct devlink *devlink, u32 obj_index); @@ -111,9 +131,6 @@ int devlink_rel_devlink_handle_put(struct sk_buff *msg, struct devlink *devlink, bool *msg_updated); /* Netlink */ -#define DEVLINK_NL_FLAG_NEED_PORT BIT(0) -#define DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT BIT(1) - enum devlink_multicast_groups { DEVLINK_MCGRP_CONFIG, }; @@ -140,7 +157,8 @@ typedef int devlink_nl_dump_one_func_t(struct sk_buff *msg, int flags); struct devlink * -devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs); +devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs, + bool dev_lock); int devlink_nl_dumpit(struct sk_buff *msg, struct netlink_callback *cb, devlink_nl_dump_one_func_t *dump_one); @@ -167,6 +185,58 @@ int devlink_nl_put_nested_handle(struct sk_buff *msg, struct net *net, struct devlink *devlink, int attrtype); int devlink_nl_msg_reply_and_new(struct sk_buff **msg, struct genl_info *info); +static inline bool devlink_nl_notify_need(struct devlink *devlink) +{ + return genl_has_listeners(&devlink_nl_family, devlink_net(devlink), + DEVLINK_MCGRP_CONFIG); +} + +struct devlink_obj_desc { + struct rcu_head rcu; + const char *bus_name; + const char *dev_name; + unsigned int port_index; + bool port_index_valid; + long data[]; +}; + +static inline void devlink_nl_obj_desc_init(struct devlink_obj_desc *desc, + struct devlink *devlink) +{ + memset(desc, 0, sizeof(*desc)); + desc->bus_name = devlink->dev->bus->name; + desc->dev_name = dev_name(devlink->dev); +} + +static inline void devlink_nl_obj_desc_port_set(struct devlink_obj_desc *desc, + struct devlink_port *devlink_port) +{ + desc->port_index = devlink_port->index; + desc->port_index_valid = true; +} + +int devlink_nl_notify_filter(struct sock *dsk, struct sk_buff *skb, void *data); + +static inline void devlink_nl_notify_send_desc(struct devlink *devlink, + struct sk_buff *msg, + struct devlink_obj_desc *desc) +{ + genlmsg_multicast_netns_filtered(&devlink_nl_family, + devlink_net(devlink), + msg, 0, DEVLINK_MCGRP_CONFIG, + GFP_KERNEL, + devlink_nl_notify_filter, desc); +} + +static inline void devlink_nl_notify_send(struct devlink *devlink, + struct sk_buff *msg) +{ + struct devlink_obj_desc desc; + + devlink_nl_obj_desc_init(&desc, devlink); + devlink_nl_notify_send_desc(devlink, msg, &desc); +} + /* Notify */ void devlink_notify_register(struct devlink *devlink); void devlink_notify_unregister(struct devlink *devlink); diff --git a/net/devlink/health.c b/net/devlink/health.c index 695df61f8ac2..acb8c0e174bb 100644 --- a/net/devlink/health.c +++ b/net/devlink/health.c @@ -490,12 +490,16 @@ static void devlink_recover_notify(struct devlink_health_reporter *reporter, enum devlink_command cmd) { struct devlink *devlink = reporter->devlink; + struct devlink_obj_desc desc; struct sk_buff *msg; int err; WARN_ON(cmd != DEVLINK_CMD_HEALTH_REPORTER_RECOVER); ASSERT_DEVLINK_REGISTERED(devlink); + if (!devlink_nl_notify_need(devlink)) + return; + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; @@ -506,8 +510,10 @@ static void devlink_recover_notify(struct devlink_health_reporter *reporter, return; } - genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg, - 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); + devlink_nl_obj_desc_init(&desc, devlink); + if (reporter->devlink_port) + devlink_nl_obj_desc_port_set(&desc, reporter->devlink_port); + devlink_nl_notify_send_desc(devlink, msg, &desc); } void @@ -1151,7 +1157,8 @@ devlink_health_reporter_get_from_cb_lock(struct netlink_callback *cb) struct nlattr **attrs = info->attrs; struct devlink *devlink; - devlink = devlink_get_from_attrs_lock(sock_net(cb->skb->sk), attrs); + devlink = devlink_get_from_attrs_lock(sock_net(cb->skb->sk), attrs, + false); if (IS_ERR(devlink)) return NULL; diff --git a/net/devlink/linecard.c b/net/devlink/linecard.c index 2f1c317b64cd..67f70a621d27 100644 --- a/net/devlink/linecard.c +++ b/net/devlink/linecard.c @@ -136,7 +136,7 @@ static void devlink_linecard_notify(struct devlink_linecard *linecard, WARN_ON(cmd != DEVLINK_CMD_LINECARD_NEW && cmd != DEVLINK_CMD_LINECARD_DEL); - if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED)) + if (!__devl_is_registered(devlink) || !devlink_nl_notify_need(devlink)) return; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); @@ -150,8 +150,7 @@ static void devlink_linecard_notify(struct devlink_linecard *linecard, return; } - genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), - msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); + devlink_nl_notify_send(devlink, msg); } void devlink_linecards_notify_register(struct devlink *devlink) diff --git a/net/devlink/netlink.c b/net/devlink/netlink.c index d0b90ebc8b15..499885c8b9ca 100644 --- a/net/devlink/netlink.c +++ b/net/devlink/netlink.c @@ -9,10 +9,127 @@ #include "devl_internal.h" +#define DEVLINK_NL_FLAG_NEED_PORT BIT(0) +#define DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT BIT(1) +#define DEVLINK_NL_FLAG_NEED_DEV_LOCK BIT(2) + static const struct genl_multicast_group devlink_nl_mcgrps[] = { [DEVLINK_MCGRP_CONFIG] = { .name = DEVLINK_GENL_MCGRP_CONFIG_NAME }, }; +struct devlink_nl_sock_priv { + struct devlink_obj_desc __rcu *flt; + spinlock_t flt_lock; /* Protects flt. */ +}; + +static void devlink_nl_sock_priv_init(void *priv) +{ + struct devlink_nl_sock_priv *sk_priv = priv; + + spin_lock_init(&sk_priv->flt_lock); +} + +static void devlink_nl_sock_priv_destroy(void *priv) +{ + struct devlink_nl_sock_priv *sk_priv = priv; + struct devlink_obj_desc *flt; + + flt = rcu_dereference_protected(sk_priv->flt, true); + kfree_rcu(flt, rcu); +} + +int devlink_nl_notify_filter_set_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink_nl_sock_priv *sk_priv; + struct nlattr **attrs = info->attrs; + struct devlink_obj_desc *flt; + size_t data_offset = 0; + size_t data_size = 0; + char *pos; + + if (attrs[DEVLINK_ATTR_BUS_NAME]) + data_size = size_add(data_size, + nla_len(attrs[DEVLINK_ATTR_BUS_NAME]) + 1); + if (attrs[DEVLINK_ATTR_DEV_NAME]) + data_size = size_add(data_size, + nla_len(attrs[DEVLINK_ATTR_DEV_NAME]) + 1); + + flt = kzalloc(size_add(sizeof(*flt), data_size), GFP_KERNEL); + if (!flt) + return -ENOMEM; + + pos = (char *) flt->data; + if (attrs[DEVLINK_ATTR_BUS_NAME]) { + data_offset += nla_strscpy(pos, + attrs[DEVLINK_ATTR_BUS_NAME], + data_size) + 1; + flt->bus_name = pos; + pos += data_offset; + } + if (attrs[DEVLINK_ATTR_DEV_NAME]) { + nla_strscpy(pos, attrs[DEVLINK_ATTR_DEV_NAME], + data_size - data_offset); + flt->dev_name = pos; + } + + if (attrs[DEVLINK_ATTR_PORT_INDEX]) { + flt->port_index = nla_get_u32(attrs[DEVLINK_ATTR_PORT_INDEX]); + flt->port_index_valid = true; + } + + /* Don't attach empty filter. */ + if (!flt->bus_name && !flt->dev_name && !flt->port_index_valid) { + kfree(flt); + flt = NULL; + } + + sk_priv = genl_sk_priv_get(&devlink_nl_family, NETLINK_CB(skb).sk); + if (IS_ERR(sk_priv)) { + kfree(flt); + return PTR_ERR(sk_priv); + } + spin_lock(&sk_priv->flt_lock); + flt = rcu_replace_pointer(sk_priv->flt, flt, + lockdep_is_held(&sk_priv->flt_lock)); + spin_unlock(&sk_priv->flt_lock); + kfree_rcu(flt, rcu); + return 0; +} + +static bool devlink_obj_desc_match(const struct devlink_obj_desc *desc, + const struct devlink_obj_desc *flt) +{ + if (desc->bus_name && flt->bus_name && + strcmp(desc->bus_name, flt->bus_name)) + return false; + if (desc->dev_name && flt->dev_name && + strcmp(desc->dev_name, flt->dev_name)) + return false; + if (desc->port_index_valid && flt->port_index_valid && + desc->port_index != flt->port_index) + return false; + return true; +} + +int devlink_nl_notify_filter(struct sock *dsk, struct sk_buff *skb, void *data) +{ + struct devlink_obj_desc *desc = data; + struct devlink_nl_sock_priv *sk_priv; + struct devlink_obj_desc *flt; + int ret = 0; + + rcu_read_lock(); + sk_priv = __genl_sk_priv_get(&devlink_nl_family, dsk); + if (!IS_ERR_OR_NULL(sk_priv)) { + flt = rcu_dereference(sk_priv->flt); + if (flt) + ret = !devlink_obj_desc_match(desc, flt); + } + rcu_read_unlock(); + return ret; +} + int devlink_nl_put_nested_handle(struct sk_buff *msg, struct net *net, struct devlink *devlink, int attrtype) { @@ -61,7 +178,8 @@ int devlink_nl_msg_reply_and_new(struct sk_buff **msg, struct genl_info *info) } struct devlink * -devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs) +devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs, + bool dev_lock) { struct devlink *devlink; unsigned long index; @@ -75,12 +193,12 @@ devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs) devname = nla_data(attrs[DEVLINK_ATTR_DEV_NAME]); devlinks_xa_for_each_registered_get(net, index, devlink) { - devl_lock(devlink); + devl_dev_lock(devlink, dev_lock); if (devl_is_registered(devlink) && strcmp(devlink->dev->bus->name, busname) == 0 && strcmp(dev_name(devlink->dev), devname) == 0) return devlink; - devl_unlock(devlink); + devl_dev_unlock(devlink, dev_lock); devlink_put(devlink); } @@ -90,11 +208,13 @@ devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs) static int __devlink_nl_pre_doit(struct sk_buff *skb, struct genl_info *info, u8 flags) { + bool dev_lock = flags & DEVLINK_NL_FLAG_NEED_DEV_LOCK; struct devlink_port *devlink_port; struct devlink *devlink; int err; - devlink = devlink_get_from_attrs_lock(genl_info_net(info), info->attrs); + devlink = devlink_get_from_attrs_lock(genl_info_net(info), info->attrs, + dev_lock); if (IS_ERR(devlink)) return PTR_ERR(devlink); @@ -114,7 +234,7 @@ static int __devlink_nl_pre_doit(struct sk_buff *skb, struct genl_info *info, return 0; unlock: - devl_unlock(devlink); + devl_dev_unlock(devlink, dev_lock); devlink_put(devlink); return err; } @@ -131,6 +251,12 @@ int devlink_nl_pre_doit_port(const struct genl_split_ops *ops, return __devlink_nl_pre_doit(skb, info, DEVLINK_NL_FLAG_NEED_PORT); } +int devlink_nl_pre_doit_dev_lock(const struct genl_split_ops *ops, + struct sk_buff *skb, struct genl_info *info) +{ + return __devlink_nl_pre_doit(skb, info, DEVLINK_NL_FLAG_NEED_DEV_LOCK); +} + int devlink_nl_pre_doit_port_optional(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info) @@ -138,16 +264,30 @@ int devlink_nl_pre_doit_port_optional(const struct genl_split_ops *ops, return __devlink_nl_pre_doit(skb, info, DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT); } -void devlink_nl_post_doit(const struct genl_split_ops *ops, - struct sk_buff *skb, struct genl_info *info) +static void __devlink_nl_post_doit(struct sk_buff *skb, struct genl_info *info, + u8 flags) { + bool dev_lock = flags & DEVLINK_NL_FLAG_NEED_DEV_LOCK; struct devlink *devlink; devlink = info->user_ptr[0]; - devl_unlock(devlink); + devl_dev_unlock(devlink, dev_lock); devlink_put(devlink); } +void devlink_nl_post_doit(const struct genl_split_ops *ops, + struct sk_buff *skb, struct genl_info *info) +{ + __devlink_nl_post_doit(skb, info, 0); +} + +void +devlink_nl_post_doit_dev_lock(const struct genl_split_ops *ops, + struct sk_buff *skb, struct genl_info *info) +{ + __devlink_nl_post_doit(skb, info, DEVLINK_NL_FLAG_NEED_DEV_LOCK); +} + static int devlink_nl_inst_single_dumpit(struct sk_buff *msg, struct netlink_callback *cb, int flags, devlink_nl_dump_one_func_t *dump_one, @@ -156,7 +296,7 @@ static int devlink_nl_inst_single_dumpit(struct sk_buff *msg, struct devlink *devlink; int err; - devlink = devlink_get_from_attrs_lock(sock_net(msg->sk), attrs); + devlink = devlink_get_from_attrs_lock(sock_net(msg->sk), attrs, false); if (IS_ERR(devlink)) return PTR_ERR(devlink); err = dump_one(msg, devlink, cb, flags | NLM_F_DUMP_FILTERED); @@ -229,4 +369,7 @@ struct genl_family devlink_nl_family __ro_after_init = { .resv_start_op = DEVLINK_CMD_SELFTESTS_RUN + 1, .mcgrps = devlink_nl_mcgrps, .n_mcgrps = ARRAY_SIZE(devlink_nl_mcgrps), + .sock_priv_size = sizeof(struct devlink_nl_sock_priv), + .sock_priv_init = devlink_nl_sock_priv_init, + .sock_priv_destroy = devlink_nl_sock_priv_destroy, }; diff --git a/net/devlink/netlink_gen.c b/net/devlink/netlink_gen.c index 788dfdc498a9..c81cf2dd154f 100644 --- a/net/devlink/netlink_gen.c +++ b/net/devlink/netlink_gen.c @@ -560,8 +560,15 @@ static const struct nla_policy devlink_selftests_run_nl_policy[DEVLINK_ATTR_SELF [DEVLINK_ATTR_SELFTESTS] = NLA_POLICY_NESTED(devlink_dl_selftest_id_nl_policy), }; +/* DEVLINK_CMD_NOTIFY_FILTER_SET - do */ +static const struct nla_policy devlink_notify_filter_set_nl_policy[DEVLINK_ATTR_PORT_INDEX + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, +}; + /* Ops table for devlink */ -const struct genl_split_ops devlink_nl_ops[73] = { +const struct genl_split_ops devlink_nl_ops[74] = { { .cmd = DEVLINK_CMD_GET, .validate = GENL_DONT_VALIDATE_STRICT, @@ -846,9 +853,9 @@ const struct genl_split_ops devlink_nl_ops[73] = { { .cmd = DEVLINK_CMD_RELOAD, .validate = GENL_DONT_VALIDATE_STRICT, - .pre_doit = devlink_nl_pre_doit, + .pre_doit = devlink_nl_pre_doit_dev_lock, .doit = devlink_nl_reload_doit, - .post_doit = devlink_nl_post_doit, + .post_doit = devlink_nl_post_doit_dev_lock, .policy = devlink_reload_nl_policy, .maxattr = DEVLINK_ATTR_RELOAD_LIMITS, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, @@ -1233,4 +1240,11 @@ const struct genl_split_ops devlink_nl_ops[73] = { .maxattr = DEVLINK_ATTR_SELFTESTS, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, + { + .cmd = DEVLINK_CMD_NOTIFY_FILTER_SET, + .doit = devlink_nl_notify_filter_set_doit, + .policy = devlink_notify_filter_set_nl_policy, + .maxattr = DEVLINK_ATTR_PORT_INDEX, + .flags = GENL_CMD_CAP_DO, + }, }; diff --git a/net/devlink/netlink_gen.h b/net/devlink/netlink_gen.h index 0e9e89c31c31..8f2bd50ddf5e 100644 --- a/net/devlink/netlink_gen.h +++ b/net/devlink/netlink_gen.h @@ -16,18 +16,23 @@ extern const struct nla_policy devlink_dl_port_function_nl_policy[DEVLINK_PORT_F extern const struct nla_policy devlink_dl_selftest_id_nl_policy[DEVLINK_ATTR_SELFTEST_ID_FLASH + 1]; /* Ops table for devlink */ -extern const struct genl_split_ops devlink_nl_ops[73]; +extern const struct genl_split_ops devlink_nl_ops[74]; int devlink_nl_pre_doit(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info); int devlink_nl_pre_doit_port(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info); +int devlink_nl_pre_doit_dev_lock(const struct genl_split_ops *ops, + struct sk_buff *skb, struct genl_info *info); int devlink_nl_pre_doit_port_optional(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info); void devlink_nl_post_doit(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info); +void +devlink_nl_post_doit_dev_lock(const struct genl_split_ops *ops, + struct sk_buff *skb, struct genl_info *info); int devlink_nl_get_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); @@ -137,5 +142,7 @@ int devlink_nl_selftests_get_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_selftests_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); int devlink_nl_selftests_run_doit(struct sk_buff *skb, struct genl_info *info); +int devlink_nl_notify_filter_set_doit(struct sk_buff *skb, + struct genl_info *info); #endif /* _LINUX_DEVLINK_GEN_H */ diff --git a/net/devlink/param.c b/net/devlink/param.c index d74df09311a9..22bc3b500518 100644 --- a/net/devlink/param.c +++ b/net/devlink/param.c @@ -343,7 +343,7 @@ static void devlink_param_notify(struct devlink *devlink, * will replay the notifications if the params are added/removed * outside of the lifetime of the instance. */ - if (!devl_is_registered(devlink)) + if (!devl_is_registered(devlink) || !devlink_nl_notify_need(devlink)) return; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); @@ -356,8 +356,7 @@ static void devlink_param_notify(struct devlink *devlink, return; } - genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), - msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); + devlink_nl_notify_send(devlink, msg); } static void devlink_params_notify(struct devlink *devlink, diff --git a/net/devlink/port.c b/net/devlink/port.c index 7634f187fa50..62e54e152ecf 100644 --- a/net/devlink/port.c +++ b/net/devlink/port.c @@ -507,12 +507,13 @@ static void devlink_port_notify(struct devlink_port *devlink_port, enum devlink_command cmd) { struct devlink *devlink = devlink_port->devlink; + struct devlink_obj_desc desc; struct sk_buff *msg; int err; WARN_ON(cmd != DEVLINK_CMD_PORT_NEW && cmd != DEVLINK_CMD_PORT_DEL); - if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED)) + if (!__devl_is_registered(devlink) || !devlink_nl_notify_need(devlink)) return; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); @@ -525,8 +526,9 @@ static void devlink_port_notify(struct devlink_port *devlink_port, return; } - genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg, - 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); + devlink_nl_obj_desc_init(&desc, devlink); + devlink_nl_obj_desc_port_set(&desc, devlink_port); + devlink_nl_notify_send_desc(devlink, msg, &desc); } static void devlink_ports_notify(struct devlink *devlink, diff --git a/net/devlink/rate.c b/net/devlink/rate.c index 94b289b93ff2..7139e67e93ae 100644 --- a/net/devlink/rate.c +++ b/net/devlink/rate.c @@ -146,7 +146,7 @@ static void devlink_rate_notify(struct devlink_rate *devlink_rate, WARN_ON(cmd != DEVLINK_CMD_RATE_NEW && cmd != DEVLINK_CMD_RATE_DEL); - if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED)) + if (!devl_is_registered(devlink) || !devlink_nl_notify_need(devlink)) return; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); @@ -159,8 +159,7 @@ static void devlink_rate_notify(struct devlink_rate *devlink_rate, return; } - genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg, - 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); + devlink_nl_notify_send(devlink, msg); } void devlink_rates_notify_register(struct devlink *devlink) diff --git a/net/devlink/region.c b/net/devlink/region.c index 0aab7b82d678..7319127c5913 100644 --- a/net/devlink/region.c +++ b/net/devlink/region.c @@ -234,15 +234,15 @@ static void devlink_nl_region_notify(struct devlink_region *region, struct sk_buff *msg; WARN_ON(cmd != DEVLINK_CMD_REGION_NEW && cmd != DEVLINK_CMD_REGION_DEL); - if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED)) + + if (!__devl_is_registered(devlink) || !devlink_nl_notify_need(devlink)) return; msg = devlink_nl_region_notify_build(region, snapshot, cmd, 0, 0); if (IS_ERR(msg)) return; - genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg, - 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); + devlink_nl_notify_send(devlink, msg); } void devlink_regions_notify_register(struct devlink *devlink) @@ -883,7 +883,8 @@ int devlink_nl_region_read_dumpit(struct sk_buff *skb, start_offset = state->start_offset; - devlink = devlink_get_from_attrs_lock(sock_net(cb->skb->sk), attrs); + devlink = devlink_get_from_attrs_lock(sock_net(cb->skb->sk), attrs, + false); if (IS_ERR(devlink)) return PTR_ERR(devlink); diff --git a/net/devlink/trap.c b/net/devlink/trap.c index c26313e7ca08..5d18c7424df1 100644 --- a/net/devlink/trap.c +++ b/net/devlink/trap.c @@ -1173,7 +1173,8 @@ devlink_trap_group_notify(struct devlink *devlink, WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_GROUP_NEW && cmd != DEVLINK_CMD_TRAP_GROUP_DEL); - if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED)) + + if (!devl_is_registered(devlink) || !devlink_nl_notify_need(devlink)) return; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); @@ -1187,8 +1188,7 @@ devlink_trap_group_notify(struct devlink *devlink, return; } - genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), - msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); + devlink_nl_notify_send(devlink, msg); } void devlink_trap_groups_notify_register(struct devlink *devlink) @@ -1234,7 +1234,8 @@ static void devlink_trap_notify(struct devlink *devlink, WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_NEW && cmd != DEVLINK_CMD_TRAP_DEL); - if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED)) + + if (!devl_is_registered(devlink) || !devlink_nl_notify_need(devlink)) return; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); @@ -1247,8 +1248,7 @@ static void devlink_trap_notify(struct devlink *devlink, return; } - genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), - msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); + devlink_nl_notify_send(devlink, msg); } void devlink_traps_notify_register(struct devlink *devlink) @@ -1710,7 +1710,8 @@ devlink_trap_policer_notify(struct devlink *devlink, WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_POLICER_NEW && cmd != DEVLINK_CMD_TRAP_POLICER_DEL); - if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED)) + + if (!devl_is_registered(devlink) || !devlink_nl_notify_need(devlink)) return; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); @@ -1724,8 +1725,7 @@ devlink_trap_policer_notify(struct devlink *devlink, return; } - genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), - msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); + devlink_nl_notify_send(devlink, msg); } void devlink_trap_policers_notify_register(struct devlink *devlink) diff --git a/net/dns_resolver/Kconfig b/net/dns_resolver/Kconfig index 155b06163409..7c2dba273e35 100644 --- a/net/dns_resolver/Kconfig +++ b/net/dns_resolver/Kconfig @@ -23,6 +23,6 @@ config DNS_RESOLVER information. To compile this as a module, choose M here: the module will be called - dnsresolver. + dns_resolver. If unsure, say N. diff --git a/net/dsa/tag_rtl4_a.c b/net/dsa/tag_rtl4_a.c index 4da5bad1a7aa..a019226ec6d2 100644 --- a/net/dsa/tag_rtl4_a.c +++ b/net/dsa/tag_rtl4_a.c @@ -23,7 +23,6 @@ #define RTL4_A_NAME "rtl4a" #define RTL4_A_HDR_LEN 4 -#define RTL4_A_ETHERTYPE 0x8899 #define RTL4_A_PROTOCOL_SHIFT 12 /* * 0x1 = Realtek Remote Control protocol (RRCP) @@ -54,7 +53,7 @@ static struct sk_buff *rtl4a_tag_xmit(struct sk_buff *skb, /* Set Ethertype */ p = (__be16 *)tag; - *p = htons(RTL4_A_ETHERTYPE); + *p = htons(ETH_P_REALTEK); out = (RTL4_A_PROTOCOL_RTL8366RB << RTL4_A_PROTOCOL_SHIFT); /* The lower bits indicate the port number */ @@ -82,7 +81,7 @@ static struct sk_buff *rtl4a_tag_rcv(struct sk_buff *skb, tag = dsa_etype_header_pos_rx(skb); p = (__be16 *)tag; etype = ntohs(*p); - if (etype != RTL4_A_ETHERTYPE) { + if (etype != ETH_P_REALTEK) { /* Not custom, just pass through */ netdev_dbg(dev, "non-realtek ethertype 0x%04x\n", etype); return skb; diff --git a/net/ethtool/common.c b/net/ethtool/common.c index b4419fb6df6a..6b2a360dcdf0 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -589,8 +589,8 @@ err_free_info: int ethtool_get_max_rxfh_channel(struct net_device *dev, u32 *max) { + struct ethtool_rxfh_param rxfh = {}; u32 dev_size, current_max = 0; - u32 *indir; int ret; if (!dev->ethtool_ops->get_rxfh_indir_size || @@ -600,21 +600,21 @@ int ethtool_get_max_rxfh_channel(struct net_device *dev, u32 *max) if (dev_size == 0) return -EOPNOTSUPP; - indir = kcalloc(dev_size, sizeof(indir[0]), GFP_USER); - if (!indir) + rxfh.indir = kcalloc(dev_size, sizeof(rxfh.indir[0]), GFP_USER); + if (!rxfh.indir) return -ENOMEM; - ret = dev->ethtool_ops->get_rxfh(dev, indir, NULL, NULL); + ret = dev->ethtool_ops->get_rxfh(dev, &rxfh); if (ret) goto out; while (dev_size--) - current_max = max(current_max, indir[dev_size]); + current_max = max(current_max, rxfh.indir[dev_size]); *max = current_max; out: - kfree(indir); + kfree(rxfh.indir); return ret; } @@ -661,6 +661,12 @@ int ethtool_get_phc_vclocks(struct net_device *dev, int **vclock_index) } EXPORT_SYMBOL(ethtool_get_phc_vclocks); +int ethtool_get_ts_info_by_layer(struct net_device *dev, struct ethtool_ts_info *info) +{ + return __ethtool_get_ts_info(dev, info); +} +EXPORT_SYMBOL(ethtool_get_ts_info_by_layer); + const struct ethtool_phy_ops *ethtool_phy_ops; void ethtool_set_ethtool_phy_ops(const struct ethtool_phy_ops *ops) diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 0b0ce4f81c01..86d47425038b 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -58,6 +58,9 @@ static struct devlink *netdev_to_devlink_get(struct net_device *dev) u32 ethtool_op_get_link(struct net_device *dev) { + /* Synchronize carrier state with link watch, see also rtnl_getlink() */ + linkwatch_sync_dev(dev); + return netif_carrier_ok(dev) ? 1 : 0; } EXPORT_SYMBOL(ethtool_op_get_link); @@ -969,18 +972,35 @@ static int ethtool_rxnfc_copy_to_user(void __user *useraddr, static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, u32 cmd, void __user *useraddr) { + const struct ethtool_ops *ops = dev->ethtool_ops; + struct ethtool_rxfh_param rxfh = {}; struct ethtool_rxnfc info; size_t info_size = sizeof(info); int rc; - if (!dev->ethtool_ops->set_rxnfc) + if (!ops->set_rxnfc || !ops->get_rxfh) return -EOPNOTSUPP; rc = ethtool_rxnfc_copy_struct(cmd, &info, &info_size, useraddr); if (rc) return rc; - rc = dev->ethtool_ops->set_rxnfc(dev, &info); + rc = ops->get_rxfh(dev, &rxfh); + if (rc) + return rc; + + /* Sanity check: if symmetric-xor is set, then: + * 1 - no other fields besides IP src/dst and/or L4 src/dst + * 2 - If src is set, dst must also be set + */ + if ((rxfh.input_xfrm & RXH_XFRM_SYM_XOR) && + ((info.data & ~(RXH_IP_SRC | RXH_IP_DST | + RXH_L4_B_0_1 | RXH_L4_B_2_3)) || + (!!(info.data & RXH_IP_SRC) ^ !!(info.data & RXH_IP_DST)) || + (!!(info.data & RXH_L4_B_0_1) ^ !!(info.data & RXH_L4_B_2_3)))) + return -EINVAL; + + rc = ops->set_rxnfc(dev, &info); if (rc) return rc; @@ -1058,15 +1078,15 @@ EXPORT_SYMBOL(netdev_rss_key_fill); static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev, void __user *useraddr) { - u32 user_size, dev_size; - u32 *indir; + struct ethtool_rxfh_param rxfh = {}; + u32 user_size; int ret; if (!dev->ethtool_ops->get_rxfh_indir_size || !dev->ethtool_ops->get_rxfh) return -EOPNOTSUPP; - dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev); - if (dev_size == 0) + rxfh.indir_size = dev->ethtool_ops->get_rxfh_indir_size(dev); + if (rxfh.indir_size == 0) return -EOPNOTSUPP; if (copy_from_user(&user_size, @@ -1075,41 +1095,41 @@ static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev, return -EFAULT; if (copy_to_user(useraddr + offsetof(struct ethtool_rxfh_indir, size), - &dev_size, sizeof(dev_size))) + &rxfh.indir_size, sizeof(rxfh.indir_size))) return -EFAULT; /* If the user buffer size is 0, this is just a query for the * device table size. Otherwise, if it's smaller than the * device table size it's an error. */ - if (user_size < dev_size) + if (user_size < rxfh.indir_size) return user_size == 0 ? 0 : -EINVAL; - indir = kcalloc(dev_size, sizeof(indir[0]), GFP_USER); - if (!indir) + rxfh.indir = kcalloc(rxfh.indir_size, sizeof(rxfh.indir[0]), GFP_USER); + if (!rxfh.indir) return -ENOMEM; - ret = dev->ethtool_ops->get_rxfh(dev, indir, NULL, NULL); + ret = dev->ethtool_ops->get_rxfh(dev, &rxfh); if (ret) goto out; - if (copy_to_user(useraddr + offsetof(struct ethtool_rxfh_indir, ring_index[0]), - indir, dev_size * sizeof(indir[0]))) + rxfh.indir, rxfh.indir_size * sizeof(*rxfh.indir))) ret = -EFAULT; out: - kfree(indir); + kfree(rxfh.indir); return ret; } static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev, void __user *useraddr) { - struct ethtool_rxnfc rx_rings; - u32 user_size, dev_size, i; - u32 *indir; const struct ethtool_ops *ops = dev->ethtool_ops; + struct ethtool_rxfh_param rxfh_dev = {}; + struct netlink_ext_ack *extack = NULL; + struct ethtool_rxnfc rx_rings; + u32 user_size, i; int ret; u32 ringidx_offset = offsetof(struct ethtool_rxfh_indir, ring_index[0]); @@ -1117,8 +1137,8 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev, !ops->get_rxnfc) return -EOPNOTSUPP; - dev_size = ops->get_rxfh_indir_size(dev); - if (dev_size == 0) + rxfh_dev.indir_size = ops->get_rxfh_indir_size(dev); + if (rxfh_dev.indir_size == 0) return -EOPNOTSUPP; if (copy_from_user(&user_size, @@ -1126,11 +1146,12 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev, sizeof(user_size))) return -EFAULT; - if (user_size != 0 && user_size != dev_size) + if (user_size != 0 && user_size != rxfh_dev.indir_size) return -EINVAL; - indir = kcalloc(dev_size, sizeof(indir[0]), GFP_USER); - if (!indir) + rxfh_dev.indir = kcalloc(rxfh_dev.indir_size, + sizeof(rxfh_dev.indir[0]), GFP_USER); + if (!rxfh_dev.indir) return -ENOMEM; rx_rings.cmd = ETHTOOL_GRXRINGS; @@ -1139,18 +1160,21 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev, goto out; if (user_size == 0) { - for (i = 0; i < dev_size; i++) + u32 *indir = rxfh_dev.indir; + + for (i = 0; i < rxfh_dev.indir_size; i++) indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data); } else { - ret = ethtool_copy_validate_indir(indir, + ret = ethtool_copy_validate_indir(rxfh_dev.indir, useraddr + ringidx_offset, &rx_rings, - dev_size); + rxfh_dev.indir_size); if (ret) goto out; } - ret = ops->set_rxfh(dev, indir, NULL, ETH_RSS_HASH_NO_CHANGE); + rxfh_dev.hfunc = ETH_RSS_HASH_NO_CHANGE; + ret = ops->set_rxfh(dev, &rxfh_dev, extack); if (ret) goto out; @@ -1161,32 +1185,29 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev, dev->priv_flags |= IFF_RXFH_CONFIGURED; out: - kfree(indir); + kfree(rxfh_dev.indir); return ret; } static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev, void __user *useraddr) { - int ret; const struct ethtool_ops *ops = dev->ethtool_ops; + struct ethtool_rxfh_param rxfh_dev = {}; u32 user_indir_size, user_key_size; - u32 dev_indir_size = 0, dev_key_size = 0; struct ethtool_rxfh rxfh; - u32 total_size; u32 indir_bytes; - u32 *indir = NULL; - u8 dev_hfunc = 0; - u8 *hkey = NULL; u8 *rss_config; + u32 total_size; + int ret; if (!ops->get_rxfh) return -EOPNOTSUPP; if (ops->get_rxfh_indir_size) - dev_indir_size = ops->get_rxfh_indir_size(dev); + rxfh_dev.indir_size = ops->get_rxfh_indir_size(dev); if (ops->get_rxfh_key_size) - dev_key_size = ops->get_rxfh_key_size(dev); + rxfh_dev.key_size = ops->get_rxfh_key_size(dev); if (copy_from_user(&rxfh, useraddr, sizeof(rxfh))) return -EFAULT; @@ -1194,44 +1215,41 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev, user_key_size = rxfh.key_size; /* Check that reserved fields are 0 for now */ - if (rxfh.rsvd8[0] || rxfh.rsvd8[1] || rxfh.rsvd8[2] || rxfh.rsvd32) + if (rxfh.rsvd8[0] || rxfh.rsvd8[1] || rxfh.rsvd32) return -EINVAL; /* Most drivers don't handle rss_context, check it's 0 as well */ - if (rxfh.rss_context && !ops->get_rxfh_context) + if (rxfh.rss_context && !ops->cap_rss_ctx_supported) return -EOPNOTSUPP; - rxfh.indir_size = dev_indir_size; - rxfh.key_size = dev_key_size; + rxfh.indir_size = rxfh_dev.indir_size; + rxfh.key_size = rxfh_dev.key_size; if (copy_to_user(useraddr, &rxfh, sizeof(rxfh))) return -EFAULT; - if ((user_indir_size && (user_indir_size != dev_indir_size)) || - (user_key_size && (user_key_size != dev_key_size))) + if ((user_indir_size && user_indir_size != rxfh_dev.indir_size) || + (user_key_size && user_key_size != rxfh_dev.key_size)) return -EINVAL; - indir_bytes = user_indir_size * sizeof(indir[0]); + indir_bytes = user_indir_size * sizeof(rxfh_dev.indir[0]); total_size = indir_bytes + user_key_size; rss_config = kzalloc(total_size, GFP_USER); if (!rss_config) return -ENOMEM; if (user_indir_size) - indir = (u32 *)rss_config; + rxfh_dev.indir = (u32 *)rss_config; if (user_key_size) - hkey = rss_config + indir_bytes; + rxfh_dev.key = rss_config + indir_bytes; - if (rxfh.rss_context) - ret = dev->ethtool_ops->get_rxfh_context(dev, indir, hkey, - &dev_hfunc, - rxfh.rss_context); - else - ret = dev->ethtool_ops->get_rxfh(dev, indir, hkey, &dev_hfunc); + rxfh_dev.rss_context = rxfh.rss_context; + + ret = dev->ethtool_ops->get_rxfh(dev, &rxfh_dev); if (ret) goto out; if (copy_to_user(useraddr + offsetof(struct ethtool_rxfh, hfunc), - &dev_hfunc, sizeof(rxfh.hfunc))) { + &rxfh_dev.hfunc, sizeof(rxfh.hfunc))) { ret = -EFAULT; } else if (copy_to_user(useraddr + offsetof(struct ethtool_rxfh, rss_config[0]), @@ -1247,16 +1265,16 @@ out: static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, void __user *useraddr) { - int ret; + u32 rss_cfg_offset = offsetof(struct ethtool_rxfh, rss_config[0]); const struct ethtool_ops *ops = dev->ethtool_ops; + u32 dev_indir_size = 0, dev_key_size = 0, i; + struct ethtool_rxfh_param rxfh_dev = {}; + struct netlink_ext_ack *extack = NULL; struct ethtool_rxnfc rx_rings; struct ethtool_rxfh rxfh; - u32 dev_indir_size = 0, dev_key_size = 0, i; - u32 *indir = NULL, indir_bytes = 0; - u8 *hkey = NULL; + u32 indir_bytes = 0; u8 *rss_config; - u32 rss_cfg_offset = offsetof(struct ethtool_rxfh, rss_config[0]); - bool delete = false; + int ret; if (!ops->get_rxnfc || !ops->set_rxfh) return -EOPNOTSUPP; @@ -1270,10 +1288,14 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, return -EFAULT; /* Check that reserved fields are 0 for now */ - if (rxfh.rsvd8[0] || rxfh.rsvd8[1] || rxfh.rsvd8[2] || rxfh.rsvd32) + if (rxfh.rsvd8[0] || rxfh.rsvd8[1] || rxfh.rsvd32) return -EINVAL; /* Most drivers don't handle rss_context, check it's 0 as well */ - if (rxfh.rss_context && !ops->set_rxfh_context) + if (rxfh.rss_context && !ops->cap_rss_ctx_supported) + return -EOPNOTSUPP; + /* Check input data transformation capabilities */ + if ((rxfh.input_xfrm & RXH_XFRM_SYM_XOR) && + !ops->cap_rss_sym_xor_supported) return -EOPNOTSUPP; /* If either indir, hash key or function is valid, proceed further. @@ -1288,7 +1310,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, return -EINVAL; if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE) - indir_bytes = dev_indir_size * sizeof(indir[0]); + indir_bytes = dev_indir_size * sizeof(rxfh_dev.indir[0]); rss_config = kzalloc(indir_bytes + rxfh.key_size, GFP_USER); if (!rss_config) @@ -1305,8 +1327,9 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, */ if (rxfh.indir_size && rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE) { - indir = (u32 *)rss_config; - ret = ethtool_copy_validate_indir(indir, + rxfh_dev.indir = (u32 *)rss_config; + rxfh_dev.indir_size = dev_indir_size; + ret = ethtool_copy_validate_indir(rxfh_dev.indir, useraddr + rss_cfg_offset, &rx_rings, rxfh.indir_size); @@ -1314,17 +1337,22 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, goto out; } else if (rxfh.indir_size == 0) { if (rxfh.rss_context == 0) { - indir = (u32 *)rss_config; + u32 *indir; + + rxfh_dev.indir = (u32 *)rss_config; + rxfh_dev.indir_size = dev_indir_size; + indir = rxfh_dev.indir; for (i = 0; i < dev_indir_size; i++) indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data); } else { - delete = true; + rxfh_dev.rss_delete = true; } } if (rxfh.key_size) { - hkey = rss_config + indir_bytes; - if (copy_from_user(hkey, + rxfh_dev.key_size = dev_key_size; + rxfh_dev.key = rss_config + indir_bytes; + if (copy_from_user(rxfh_dev.key, useraddr + rss_cfg_offset + indir_bytes, rxfh.key_size)) { ret = -EFAULT; @@ -1332,19 +1360,19 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, } } - if (rxfh.rss_context) - ret = ops->set_rxfh_context(dev, indir, hkey, rxfh.hfunc, - &rxfh.rss_context, delete); - else - ret = ops->set_rxfh(dev, indir, hkey, rxfh.hfunc); + rxfh_dev.hfunc = rxfh.hfunc; + rxfh_dev.rss_context = rxfh.rss_context; + rxfh_dev.input_xfrm = rxfh.input_xfrm; + + ret = ops->set_rxfh(dev, &rxfh_dev, extack); if (ret) goto out; if (copy_to_user(useraddr + offsetof(struct ethtool_rxfh, rss_context), - &rxfh.rss_context, sizeof(rxfh.rss_context))) + &rxfh_dev.rss_context, sizeof(rxfh_dev.rss_context))) ret = -EFAULT; - if (!rxfh.rss_context) { + if (!rxfh_dev.rss_context) { /* indicate whether rxfh was set to default */ if (rxfh.indir_size == 0) dev->priv_flags &= ~IFF_RXFH_CONFIGURED; @@ -1991,6 +2019,13 @@ __printf(2, 3) void ethtool_sprintf(u8 **data, const char *fmt, ...) } EXPORT_SYMBOL(ethtool_sprintf); +void ethtool_puts(u8 **data, const char *str) +{ + strscpy(*data, str, ETH_GSTRING_LEN); + *data += ETH_GSTRING_LEN; +} +EXPORT_SYMBOL(ethtool_puts); + static int ethtool_phys_id(struct net_device *dev, void __user *useraddr) { struct ethtool_value id; diff --git a/net/ethtool/rings.c b/net/ethtool/rings.c index fb09f774ea01..b7865a14fdf8 100644 --- a/net/ethtool/rings.c +++ b/net/ethtool/rings.c @@ -124,6 +124,8 @@ const struct nla_policy ethnl_rings_set_policy[] = { [ETHTOOL_A_RINGS_RX_JUMBO] = { .type = NLA_U32 }, [ETHTOOL_A_RINGS_TX] = { .type = NLA_U32 }, [ETHTOOL_A_RINGS_RX_BUF_LEN] = NLA_POLICY_MIN(NLA_U32, 1), + [ETHTOOL_A_RINGS_TCP_DATA_SPLIT] = + NLA_POLICY_MAX(NLA_U8, ETHTOOL_TCP_DATA_SPLIT_ENABLED), [ETHTOOL_A_RINGS_CQE_SIZE] = NLA_POLICY_MIN(NLA_U32, 1), [ETHTOOL_A_RINGS_TX_PUSH] = NLA_POLICY_MAX(NLA_U8, 1), [ETHTOOL_A_RINGS_RX_PUSH] = NLA_POLICY_MAX(NLA_U8, 1), @@ -145,6 +147,14 @@ ethnl_set_rings_validate(struct ethnl_req_info *req_info, return -EOPNOTSUPP; } + if (tb[ETHTOOL_A_RINGS_TCP_DATA_SPLIT] && + !(ops->supported_ring_params & ETHTOOL_RING_USE_TCP_DATA_SPLIT)) { + NL_SET_ERR_MSG_ATTR(info->extack, + tb[ETHTOOL_A_RINGS_TCP_DATA_SPLIT], + "setting TCP data split is not supported"); + return -EOPNOTSUPP; + } + if (tb[ETHTOOL_A_RINGS_CQE_SIZE] && !(ops->supported_ring_params & ETHTOOL_RING_USE_CQE_SIZE)) { NL_SET_ERR_MSG_ATTR(info->extack, @@ -202,6 +212,8 @@ ethnl_set_rings(struct ethnl_req_info *req_info, struct genl_info *info) ethnl_update_u32(&ringparam.tx_pending, tb[ETHTOOL_A_RINGS_TX], &mod); ethnl_update_u32(&kernel_ringparam.rx_buf_len, tb[ETHTOOL_A_RINGS_RX_BUF_LEN], &mod); + ethnl_update_u8(&kernel_ringparam.tcp_data_split, + tb[ETHTOOL_A_RINGS_TCP_DATA_SPLIT], &mod); ethnl_update_u32(&kernel_ringparam.cqe_size, tb[ETHTOOL_A_RINGS_CQE_SIZE], &mod); ethnl_update_u8(&kernel_ringparam.tx_push, diff --git a/net/ethtool/rss.c b/net/ethtool/rss.c index 5764202e6cb6..71679137eff2 100644 --- a/net/ethtool/rss.c +++ b/net/ethtool/rss.c @@ -13,6 +13,7 @@ struct rss_reply_data { u32 indir_size; u32 hkey_size; u32 hfunc; + u32 input_xfrm; u32 *indir_table; u8 *hkey; }; @@ -48,9 +49,9 @@ rss_prepare_data(const struct ethnl_req_info *req_base, struct rss_reply_data *data = RSS_REPDATA(reply_base); struct rss_req_info *request = RSS_REQINFO(req_base); struct net_device *dev = reply_base->dev; + struct ethtool_rxfh_param rxfh = {}; const struct ethtool_ops *ops; u32 total_size, indir_bytes; - u8 dev_hfunc = 0; u8 *rss_config; int ret; @@ -59,7 +60,7 @@ rss_prepare_data(const struct ethnl_req_info *req_base, return -EOPNOTSUPP; /* Some drivers don't handle rss_context */ - if (request->rss_context && !ops->get_rxfh_context) + if (request->rss_context && !ops->cap_rss_ctx_supported) return -EOPNOTSUPP; ret = ethnl_ops_begin(dev); @@ -83,21 +84,21 @@ rss_prepare_data(const struct ethnl_req_info *req_base, if (data->indir_size) data->indir_table = (u32 *)rss_config; - if (data->hkey_size) data->hkey = rss_config + indir_bytes; - if (request->rss_context) - ret = ops->get_rxfh_context(dev, data->indir_table, data->hkey, - &dev_hfunc, request->rss_context); - else - ret = ops->get_rxfh(dev, data->indir_table, data->hkey, - &dev_hfunc); + rxfh.indir_size = data->indir_size; + rxfh.indir = data->indir_table; + rxfh.key_size = data->hkey_size; + rxfh.key = data->hkey; + rxfh.rss_context = request->rss_context; + ret = ops->get_rxfh(dev, &rxfh); if (ret) goto out_ops; - data->hfunc = dev_hfunc; + data->hfunc = rxfh.hfunc; + data->input_xfrm = rxfh.input_xfrm; out_ops: ethnl_ops_complete(dev); return ret; @@ -111,6 +112,7 @@ rss_reply_size(const struct ethnl_req_info *req_base, int len; len = nla_total_size(sizeof(u32)) + /* _RSS_HFUNC */ + nla_total_size(sizeof(u32)) + /* _RSS_INPUT_XFRM */ nla_total_size(sizeof(u32) * data->indir_size) + /* _RSS_INDIR */ nla_total_size(data->hkey_size); /* _RSS_HKEY */ @@ -125,6 +127,8 @@ rss_fill_reply(struct sk_buff *skb, const struct ethnl_req_info *req_base, if ((data->hfunc && nla_put_u32(skb, ETHTOOL_A_RSS_HFUNC, data->hfunc)) || + (data->input_xfrm && + nla_put_u32(skb, ETHTOOL_A_RSS_INPUT_XFRM, data->input_xfrm)) || (data->indir_size && nla_put(skb, ETHTOOL_A_RSS_INDIR, sizeof(u32) * data->indir_size, data->indir_table)) || diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index 306f942c3b28..7ceb9ac6e730 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -173,7 +173,24 @@ static int hsr_dev_open(struct net_device *dev) static int hsr_dev_close(struct net_device *dev) { - /* Nothing to do here. */ + struct hsr_port *port; + struct hsr_priv *hsr; + + hsr = netdev_priv(dev); + hsr_for_each_port(hsr, port) { + if (port->type == HSR_PT_MASTER) + continue; + switch (port->type) { + case HSR_PT_SLAVE_A: + case HSR_PT_SLAVE_B: + dev_uc_unsync(port->dev, dev); + dev_mc_unsync(port->dev, dev); + break; + default: + break; + } + } + return 0; } @@ -404,12 +421,60 @@ void hsr_del_ports(struct hsr_priv *hsr) hsr_del_port(port); } +static void hsr_set_rx_mode(struct net_device *dev) +{ + struct hsr_port *port; + struct hsr_priv *hsr; + + hsr = netdev_priv(dev); + + hsr_for_each_port(hsr, port) { + if (port->type == HSR_PT_MASTER) + continue; + switch (port->type) { + case HSR_PT_SLAVE_A: + case HSR_PT_SLAVE_B: + dev_mc_sync_multiple(port->dev, dev); + dev_uc_sync_multiple(port->dev, dev); + break; + default: + break; + } + } +} + +static void hsr_change_rx_flags(struct net_device *dev, int change) +{ + struct hsr_port *port; + struct hsr_priv *hsr; + + hsr = netdev_priv(dev); + + hsr_for_each_port(hsr, port) { + if (port->type == HSR_PT_MASTER) + continue; + switch (port->type) { + case HSR_PT_SLAVE_A: + case HSR_PT_SLAVE_B: + if (change & IFF_ALLMULTI) + dev_set_allmulti(port->dev, + dev->flags & + IFF_ALLMULTI ? 1 : -1); + break; + default: + break; + } + } +} + static const struct net_device_ops hsr_device_ops = { .ndo_change_mtu = hsr_dev_change_mtu, .ndo_open = hsr_dev_open, .ndo_stop = hsr_dev_close, .ndo_start_xmit = hsr_dev_xmit, + .ndo_change_rx_flags = hsr_change_rx_flags, .ndo_fix_features = hsr_fix_features, + .ndo_set_rx_mode = hsr_set_rx_mode, }; static struct device_type hsr_type = { diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index fb81de10d332..835f4f9d98d2 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1633,6 +1633,7 @@ int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) #endif return -EINVAL; } +EXPORT_SYMBOL(inet_recv_error); int inet_gro_complete(struct sk_buff *skb, int nhoff) { @@ -1847,9 +1848,7 @@ static __net_init int inet_init_net(struct net *net) /* * Set defaults for local port range */ - seqlock_init(&net->ipv4.ip_local_ports.lock); - net->ipv4.ip_local_ports.range[0] = 32768; - net->ipv4.ip_local_ports.range[1] = 60999; + net->ipv4.ip_local_ports.range = 60999u << 16 | 32768u; seqlock_init(&net->ipv4.ping_group_range.lock); /* diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index 39dcccf0f174..ae8b15e6896f 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -271,6 +271,74 @@ static int bpf_tcp_ca_validate(void *kdata) return tcp_validate_congestion_control(kdata); } +static u32 bpf_tcp_ca_ssthresh(struct sock *sk) +{ + return 0; +} + +static void bpf_tcp_ca_cong_avoid(struct sock *sk, u32 ack, u32 acked) +{ +} + +static void bpf_tcp_ca_set_state(struct sock *sk, u8 new_state) +{ +} + +static void bpf_tcp_ca_cwnd_event(struct sock *sk, enum tcp_ca_event ev) +{ +} + +static void bpf_tcp_ca_in_ack_event(struct sock *sk, u32 flags) +{ +} + +static void bpf_tcp_ca_pkts_acked(struct sock *sk, const struct ack_sample *sample) +{ +} + +static u32 bpf_tcp_ca_min_tso_segs(struct sock *sk) +{ + return 0; +} + +static void bpf_tcp_ca_cong_control(struct sock *sk, const struct rate_sample *rs) +{ +} + +static u32 bpf_tcp_ca_undo_cwnd(struct sock *sk) +{ + return 0; +} + +static u32 bpf_tcp_ca_sndbuf_expand(struct sock *sk) +{ + return 0; +} + +static void __bpf_tcp_ca_init(struct sock *sk) +{ +} + +static void __bpf_tcp_ca_release(struct sock *sk) +{ +} + +static struct tcp_congestion_ops __bpf_ops_tcp_congestion_ops = { + .ssthresh = bpf_tcp_ca_ssthresh, + .cong_avoid = bpf_tcp_ca_cong_avoid, + .set_state = bpf_tcp_ca_set_state, + .cwnd_event = bpf_tcp_ca_cwnd_event, + .in_ack_event = bpf_tcp_ca_in_ack_event, + .pkts_acked = bpf_tcp_ca_pkts_acked, + .min_tso_segs = bpf_tcp_ca_min_tso_segs, + .cong_control = bpf_tcp_ca_cong_control, + .undo_cwnd = bpf_tcp_ca_undo_cwnd, + .sndbuf_expand = bpf_tcp_ca_sndbuf_expand, + + .init = __bpf_tcp_ca_init, + .release = __bpf_tcp_ca_release, +}; + struct bpf_struct_ops bpf_tcp_congestion_ops = { .verifier_ops = &bpf_tcp_ca_verifier_ops, .reg = bpf_tcp_ca_reg, @@ -281,6 +349,7 @@ struct bpf_struct_ops bpf_tcp_congestion_ops = { .init = bpf_tcp_ca_init, .validate = bpf_tcp_ca_validate, .name = "tcp_congestion_ops", + .cfi_stubs = &__bpf_ops_tcp_congestion_ops, }; static int __init bpf_tcp_ca_kfunc_init(void) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 394a498c2823..bd325b029dd1 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -117,37 +117,39 @@ bool inet_rcv_saddr_any(const struct sock *sk) return !sk->sk_rcv_saddr; } -void inet_get_local_port_range(const struct net *net, int *low, int *high) -{ - unsigned int seq; - - do { - seq = read_seqbegin(&net->ipv4.ip_local_ports.lock); - - *low = net->ipv4.ip_local_ports.range[0]; - *high = net->ipv4.ip_local_ports.range[1]; - } while (read_seqretry(&net->ipv4.ip_local_ports.lock, seq)); -} -EXPORT_SYMBOL(inet_get_local_port_range); - -void inet_sk_get_local_port_range(const struct sock *sk, int *low, int *high) +/** + * inet_sk_get_local_port_range - fetch ephemeral ports range + * @sk: socket + * @low: pointer to low port + * @high: pointer to high port + * + * Fetch netns port range (/proc/sys/net/ipv4/ip_local_port_range) + * Range can be overridden if socket got IP_LOCAL_PORT_RANGE option. + * Returns true if IP_LOCAL_PORT_RANGE was set on this socket. + */ +bool inet_sk_get_local_port_range(const struct sock *sk, int *low, int *high) { - const struct inet_sock *inet = inet_sk(sk); - const struct net *net = sock_net(sk); int lo, hi, sk_lo, sk_hi; + bool local_range = false; + u32 sk_range; - inet_get_local_port_range(net, &lo, &hi); + inet_get_local_port_range(sock_net(sk), &lo, &hi); - sk_lo = inet->local_port_range.lo; - sk_hi = inet->local_port_range.hi; + sk_range = READ_ONCE(inet_sk(sk)->local_port_range); + if (unlikely(sk_range)) { + sk_lo = sk_range & 0xffff; + sk_hi = sk_range >> 16; - if (unlikely(lo <= sk_lo && sk_lo <= hi)) - lo = sk_lo; - if (unlikely(lo <= sk_hi && sk_hi <= hi)) - hi = sk_hi; + if (lo <= sk_lo && sk_lo <= hi) + lo = sk_lo; + if (lo <= sk_hi && sk_hi <= hi) + hi = sk_hi; + local_range = true; + } *low = lo; *high = hi; + return local_range; } EXPORT_SYMBOL(inet_sk_get_local_port_range); diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 7d0e7aaa71e0..46b13962ad02 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -1077,10 +1077,94 @@ skip_listen_ht: s_i = num = s_num = 0; } +/* Process a maximum of SKARR_SZ sockets at a time when walking hash buckets + * with bh disabled. + */ +#define SKARR_SZ 16 + + /* Dump bound but inactive (not listening, connecting, etc.) sockets */ + if (cb->args[0] == 1) { + if (!(idiag_states & TCPF_BOUND_INACTIVE)) + goto skip_bind_ht; + + for (i = s_i; i < hashinfo->bhash_size; i++) { + struct inet_bind_hashbucket *ibb; + struct inet_bind2_bucket *tb2; + struct sock *sk_arr[SKARR_SZ]; + int num_arr[SKARR_SZ]; + int idx, accum, res; + +resume_bind_walk: + num = 0; + accum = 0; + ibb = &hashinfo->bhash2[i]; + + spin_lock_bh(&ibb->lock); + inet_bind_bucket_for_each(tb2, &ibb->chain) { + if (!net_eq(ib2_net(tb2), net)) + continue; + + sk_for_each_bound_bhash2(sk, &tb2->owners) { + struct inet_sock *inet = inet_sk(sk); + + if (num < s_num) + goto next_bind; + + if (sk->sk_state != TCP_CLOSE || + !inet->inet_num) + goto next_bind; + + if (r->sdiag_family != AF_UNSPEC && + r->sdiag_family != sk->sk_family) + goto next_bind; + + if (!inet_diag_bc_sk(bc, sk)) + goto next_bind; + + sock_hold(sk); + num_arr[accum] = num; + sk_arr[accum] = sk; + if (++accum == SKARR_SZ) + goto pause_bind_walk; +next_bind: + num++; + } + } +pause_bind_walk: + spin_unlock_bh(&ibb->lock); + + res = 0; + for (idx = 0; idx < accum; idx++) { + if (res >= 0) { + res = inet_sk_diag_fill(sk_arr[idx], + NULL, skb, cb, + r, NLM_F_MULTI, + net_admin); + if (res < 0) + num = num_arr[idx]; + } + sock_put(sk_arr[idx]); + } + if (res < 0) + goto done; + + cond_resched(); + + if (accum == SKARR_SZ) { + s_num = num + 1; + goto resume_bind_walk; + } + + s_num = 0; + } +skip_bind_ht: + cb->args[0] = 2; + s_i = num = s_num = 0; + } + if (!(idiag_states & ~TCPF_LISTEN)) goto out; -#define SKARR_SZ 16 for (i = s_i; i <= hashinfo->ehash_mask; i++) { struct inet_ehash_bucket *head = &hashinfo->ehash[i]; spinlock_t *lock = inet_ehash_lockp(hashinfo, i); diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index a532f749e477..9ff201bc4e6d 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -1012,7 +1012,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, bool tb_created = false; u32 remaining, offset; int ret, i, low, high; - int l3mdev; + bool local_ports; + int step, l3mdev; u32 index; if (port) { @@ -1024,10 +1025,12 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, l3mdev = inet_sk_bound_l3mdev(sk); - inet_sk_get_local_port_range(sk, &low, &high); + local_ports = inet_sk_get_local_port_range(sk, &low, &high); + step = local_ports ? 1 : 2; + high++; /* [32768, 60999] -> [32768, 61000[ */ remaining = high - low; - if (likely(remaining > 1)) + if (!local_ports && remaining > 1) remaining &= ~1U; get_random_sleepable_once(table_perturb, @@ -1040,10 +1043,11 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, /* In first pass we try ports of @low parity. * inet_csk_get_port() does the opposite choice. */ - offset &= ~1U; + if (!local_ports) + offset &= ~1U; other_parity_scan: port = low + offset; - for (i = 0; i < remaining; i += 2, port += 2) { + for (i = 0; i < remaining; i += step, port += step) { if (unlikely(port >= high)) port -= remaining; if (inet_is_local_reserved_port(net, port)) @@ -1083,10 +1087,11 @@ next_port: cond_resched(); } - offset++; - if ((offset & 1) && remaining > 1) - goto other_parity_scan; - + if (!local_ports) { + offset++; + if ((offset & 1) && remaining > 1) + goto other_parity_scan; + } return -EADDRNOTAVAIL; ok: @@ -1109,8 +1114,8 @@ ok: * on low contention the randomness is maximal and on high contention * it may be inexistent. */ - i = max_t(int, i, get_random_u32_below(8) * 2); - WRITE_ONCE(table_perturb[index], READ_ONCE(table_perturb[index]) + i + 2); + i = max_t(int, i, get_random_u32_below(8) * step); + WRITE_ONCE(table_perturb[index], READ_ONCE(table_perturb[index]) + i + step); /* Head lock still held and bh's disabled */ inet_bind_hash(sk, tb, tb2, port); diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 2efc53526a38..66247e8b429e 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -775,7 +775,7 @@ static int ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, int optlen) if (optlen < GROUP_FILTER_SIZE(0)) return -EINVAL; - if (optlen > READ_ONCE(sysctl_optmem_max)) + if (optlen > READ_ONCE(sock_net(sk)->core.sysctl_optmem_max)) return -ENOBUFS; gsf = memdup_sockptr(optval, optlen); @@ -811,7 +811,7 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, if (optlen < size0) return -EINVAL; - if (optlen > READ_ONCE(sysctl_optmem_max) - 4) + if (optlen > READ_ONCE(sock_net(sk)->core.sysctl_optmem_max) - 4) return -ENOBUFS; p = kmalloc(optlen + 4, GFP_KERNEL); @@ -1055,6 +1055,19 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname, case IP_TOS: /* This sets both TOS and Precedence */ ip_sock_set_tos(sk, val); return 0; + case IP_LOCAL_PORT_RANGE: + { + u16 lo = val; + u16 hi = val >> 16; + + if (optlen != sizeof(u32)) + return -EINVAL; + if (lo != 0 && hi != 0 && lo > hi) + return -EINVAL; + + WRITE_ONCE(inet->local_port_range, val); + return 0; + } } err = 0; @@ -1241,7 +1254,7 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname, if (optlen < IP_MSFILTER_SIZE(0)) goto e_inval; - if (optlen > READ_ONCE(sysctl_optmem_max)) { + if (optlen > READ_ONCE(net->core.sysctl_optmem_max)) { err = -ENOBUFS; break; } @@ -1332,20 +1345,6 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname, err = xfrm_user_policy(sk, optname, optval, optlen); break; - case IP_LOCAL_PORT_RANGE: - { - const __u16 lo = val; - const __u16 hi = val >> 16; - - if (optlen != sizeof(__u32)) - goto e_inval; - if (lo != 0 && hi != 0 && lo > hi) - goto e_inval; - - inet->local_port_range.lo = lo; - inet->local_port_range.hi = hi; - break; - } default: err = -ENOPROTOOPT; break; @@ -1692,6 +1691,9 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname, return -EFAULT; return 0; } + case IP_LOCAL_PORT_RANGE: + val = READ_ONCE(inet->local_port_range); + goto copyval; } if (needs_rtnl) @@ -1721,9 +1723,6 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname, else err = ip_get_mcast_msfilter(sk, optval, optlen, len); goto out; - case IP_LOCAL_PORT_RANGE: - val = inet->local_port_range.hi << 16 | inet->local_port_range.lo; - break; case IP_PROTOCOL: val = inet_sk(sk)->inet_num; break; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 9e222a57bc2b..0063a237253b 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1025,6 +1025,10 @@ static int ipmr_cache_report(const struct mr_table *mrt, struct sk_buff *skb; int ret; + mroute_sk = rcu_dereference(mrt->mroute_sk); + if (!mroute_sk) + return -EINVAL; + if (assert == IGMPMSG_WHOLEPKT || assert == IGMPMSG_WRVIFWHOLE) skb = skb_realloc_headroom(pkt, sizeof(struct iphdr)); else @@ -1069,7 +1073,8 @@ static int ipmr_cache_report(const struct mr_table *mrt, msg = (struct igmpmsg *)skb_network_header(skb); msg->im_vif = vifi; msg->im_vif_hi = vifi >> 8; - skb_dst_set(skb, dst_clone(skb_dst(pkt))); + ipv4_pktinfo_prepare(mroute_sk, pkt); + memcpy(skb->cb, pkt->cb, sizeof(skb->cb)); /* Add our header */ igmp = skb_put(skb, sizeof(struct igmphdr)); igmp->type = assert; @@ -1079,12 +1084,6 @@ static int ipmr_cache_report(const struct mr_table *mrt, skb->transport_header = skb->network_header; } - mroute_sk = rcu_dereference(mrt->mroute_sk); - if (!mroute_sk) { - kfree_skb(skb); - return -EINVAL; - } - igmpmsg_netlink_event(mrt, skb); /* Deliver to mrouted */ diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index d37282c06e3d..61f1c96cfe63 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -189,12 +189,14 @@ __u32 cookie_v4_init_sequence(const struct sk_buff *skb, __u16 *mssp) * Check if a ack sequence number is a valid syncookie. * Return the decoded mss if it is, or 0 if not. */ -int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th, - u32 cookie) +int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th) { + __u32 cookie = ntohl(th->ack_seq) - 1; __u32 seq = ntohl(th->seq) - 1; - __u32 mssind = check_tcp_syn_cookie(cookie, iph->saddr, iph->daddr, - th->source, th->dest, seq); + __u32 mssind; + + mssind = check_tcp_syn_cookie(cookie, iph->saddr, iph->daddr, + th->source, th->dest, seq); return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0; } @@ -202,7 +204,7 @@ EXPORT_SYMBOL_GPL(__cookie_v4_check); struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, struct request_sock *req, - struct dst_entry *dst, u32 tsoff) + struct dst_entry *dst) { struct inet_connection_sock *icsk = inet_csk(sk); struct sock *child; @@ -212,7 +214,6 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, NULL, &own_req); if (child) { refcount_set(&req->rsk_refcnt, 1); - tcp_sk(child)->tsoffset = tsoff; sock_rps_save_rxhash(child, skb); if (rsk_drop_req(req)) { @@ -269,26 +270,46 @@ bool cookie_timestamp_decode(const struct net *net, } EXPORT_SYMBOL(cookie_timestamp_decode); -bool cookie_ecn_ok(const struct tcp_options_received *tcp_opt, - const struct net *net, const struct dst_entry *dst) +static int cookie_tcp_reqsk_init(struct sock *sk, struct sk_buff *skb, + struct request_sock *req) { - bool ecn_ok = tcp_opt->rcv_tsecr & TS_OPT_ECN; + struct inet_request_sock *ireq = inet_rsk(req); + struct tcp_request_sock *treq = tcp_rsk(req); + const struct tcphdr *th = tcp_hdr(skb); - if (!ecn_ok) - return false; + req->num_retrans = 0; - if (READ_ONCE(net->ipv4.sysctl_tcp_ecn)) - return true; + ireq->ir_num = ntohs(th->dest); + ireq->ir_rmt_port = th->source; + ireq->ir_iif = inet_request_bound_dev_if(sk, skb); + ireq->ir_mark = inet_request_mark(sk, skb); + + if (IS_ENABLED(CONFIG_SMC)) + ireq->smc_ok = 0; - return dst_feature(dst, RTAX_FEATURE_ECN); + treq->snt_synack = 0; + treq->tfo_listener = false; + treq->txhash = net_tx_rndhash(); + treq->rcv_isn = ntohl(th->seq) - 1; + treq->snt_isn = ntohl(th->ack_seq) - 1; + treq->syn_tos = TCP_SKB_CB(skb)->ip_dsfield; + treq->req_usec_ts = false; + +#if IS_ENABLED(CONFIG_MPTCP) + treq->is_mptcp = sk_is_mptcp(sk); + if (treq->is_mptcp) + return mptcp_subflow_init_cookie_req(req, sk, skb); +#endif + + return 0; } -EXPORT_SYMBOL(cookie_ecn_ok); struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, - const struct tcp_request_sock_ops *af_ops, - struct sock *sk, - struct sk_buff *skb) + struct sock *sk, struct sk_buff *skb, + struct tcp_options_received *tcp_opt, + int mss, u32 tsoff) { + struct inet_request_sock *ireq; struct tcp_request_sock *treq; struct request_sock *req; @@ -300,126 +321,109 @@ struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, if (!req) return NULL; - treq = tcp_rsk(req); + if (cookie_tcp_reqsk_init(sk, skb, req)) { + reqsk_free(req); + return NULL; + } - /* treq->af_specific might be used to perform TCP_MD5 lookup */ - treq->af_specific = af_ops; + ireq = inet_rsk(req); + treq = tcp_rsk(req); - treq->syn_tos = TCP_SKB_CB(skb)->ip_dsfield; - treq->req_usec_ts = false; + req->mss = mss; + req->ts_recent = tcp_opt->saw_tstamp ? tcp_opt->rcv_tsval : 0; -#if IS_ENABLED(CONFIG_MPTCP) - treq->is_mptcp = sk_is_mptcp(sk); - if (treq->is_mptcp) { - int err = mptcp_subflow_init_cookie_req(req, sk, skb); + ireq->snd_wscale = tcp_opt->snd_wscale; + ireq->tstamp_ok = tcp_opt->saw_tstamp; + ireq->sack_ok = tcp_opt->sack_ok; + ireq->wscale_ok = tcp_opt->wscale_ok; + ireq->ecn_ok = !!(tcp_opt->rcv_tsecr & TS_OPT_ECN); - if (err) { - reqsk_free(req); - return NULL; - } - } -#endif + treq->ts_off = tsoff; return req; } EXPORT_SYMBOL_GPL(cookie_tcp_reqsk_alloc); -/* On input, sk is a listener. - * Output is listener if incoming packet would not create a child - * NULL if memory could not be allocated. - */ -struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) +static struct request_sock *cookie_tcp_check(struct net *net, struct sock *sk, + struct sk_buff *skb) { - struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4.opt; struct tcp_options_received tcp_opt; - struct inet_request_sock *ireq; - struct tcp_request_sock *treq; - struct tcp_sock *tp = tcp_sk(sk); - const struct tcphdr *th = tcp_hdr(skb); - __u32 cookie = ntohl(th->ack_seq) - 1; - struct sock *ret = sk; - struct request_sock *req; - int full_space, mss; - struct rtable *rt; - __u8 rcv_wscale; - struct flowi4 fl4; u32 tsoff = 0; - int l3index; - - if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) || - !th->ack || th->rst) - goto out; + int mss; if (tcp_synq_no_recent_overflow(sk)) goto out; - mss = __cookie_v4_check(ip_hdr(skb), th, cookie); - if (mss == 0) { - __NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED); + mss = __cookie_v4_check(ip_hdr(skb), tcp_hdr(skb)); + if (!mss) { + __NET_INC_STATS(net, LINUX_MIB_SYNCOOKIESFAILED); goto out; } - __NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV); + __NET_INC_STATS(net, LINUX_MIB_SYNCOOKIESRECV); /* check for timestamp cookie support */ memset(&tcp_opt, 0, sizeof(tcp_opt)); - tcp_parse_options(sock_net(sk), skb, &tcp_opt, 0, NULL); + tcp_parse_options(net, skb, &tcp_opt, 0, NULL); if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) { - tsoff = secure_tcp_ts_off(sock_net(sk), + tsoff = secure_tcp_ts_off(net, ip_hdr(skb)->daddr, ip_hdr(skb)->saddr); tcp_opt.rcv_tsecr -= tsoff; } - if (!cookie_timestamp_decode(sock_net(sk), &tcp_opt)) + if (!cookie_timestamp_decode(net, &tcp_opt)) goto out; - ret = NULL; - req = cookie_tcp_reqsk_alloc(&tcp_request_sock_ops, - &tcp_request_sock_ipv4_ops, sk, skb); - if (!req) + return cookie_tcp_reqsk_alloc(&tcp_request_sock_ops, sk, skb, + &tcp_opt, mss, tsoff); +out: + return ERR_PTR(-EINVAL); +} + +/* On input, sk is a listener. + * Output is listener if incoming packet would not create a child + * NULL if memory could not be allocated. + */ +struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) +{ + struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4.opt; + const struct tcphdr *th = tcp_hdr(skb); + struct tcp_sock *tp = tcp_sk(sk); + struct inet_request_sock *ireq; + struct net *net = sock_net(sk); + struct request_sock *req; + struct sock *ret = sk; + struct flowi4 fl4; + struct rtable *rt; + __u8 rcv_wscale; + int full_space; + + if (!READ_ONCE(net->ipv4.sysctl_tcp_syncookies) || + !th->ack || th->rst) goto out; + req = cookie_tcp_check(net, sk, skb); + if (IS_ERR(req)) + goto out; + if (!req) + goto out_drop; + ireq = inet_rsk(req); - treq = tcp_rsk(req); - treq->rcv_isn = ntohl(th->seq) - 1; - treq->snt_isn = cookie; - treq->ts_off = 0; - treq->txhash = net_tx_rndhash(); - req->mss = mss; - ireq->ir_num = ntohs(th->dest); - ireq->ir_rmt_port = th->source; + sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr); sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr); - ireq->ir_mark = inet_request_mark(sk, skb); - ireq->snd_wscale = tcp_opt.snd_wscale; - ireq->sack_ok = tcp_opt.sack_ok; - ireq->wscale_ok = tcp_opt.wscale_ok; - ireq->tstamp_ok = tcp_opt.saw_tstamp; - req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; - treq->snt_synack = 0; - treq->tfo_listener = false; - - if (IS_ENABLED(CONFIG_SMC)) - ireq->smc_ok = 0; - - ireq->ir_iif = inet_request_bound_dev_if(sk, skb); - - l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif); - tcp_ao_syncookie(sk, skb, treq, AF_INET, l3index); /* We throwed the options of the initial SYN away, so we hope * the ACK carries the same options again (see RFC1122 4.2.3.8) */ - RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(sock_net(sk), skb)); + RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(net, skb)); - if (security_inet_conn_request(sk, skb, req)) { - reqsk_free(req); - goto out; - } + if (security_inet_conn_request(sk, skb, req)) + goto out_free; - req->num_retrans = 0; + tcp_ao_syncookie(sk, skb, req, AF_INET); /* * We need to lookup the route here to get at the correct @@ -433,11 +437,9 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) opt->srr ? opt->faddr : ireq->ir_rmt_addr, ireq->ir_loc_addr, th->source, th->dest, sk->sk_uid); security_req_classify_flow(req, flowi4_to_flowi_common(&fl4)); - rt = ip_route_output_key(sock_net(sk), &fl4); - if (IS_ERR(rt)) { - reqsk_free(req); - goto out; - } + rt = ip_route_output_key(net, &fl4); + if (IS_ERR(rt)) + goto out_free; /* Try to redo what tcp_v4_send_synack did. */ req->rsk_window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW); @@ -453,13 +455,18 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) dst_metric(&rt->dst, RTAX_INITRWND)); ireq->rcv_wscale = rcv_wscale; - ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, sock_net(sk), &rt->dst); + ireq->ecn_ok &= cookie_ecn_ok(net, &rt->dst); - ret = tcp_get_cookie_sock(sk, skb, req, &rt->dst, tsoff); + ret = tcp_get_cookie_sock(sk, skb, req, &rt->dst); /* ip_queue_xmit() depends on our flow being setup * Normal sockets get it right from inet_csk_route_child_sock() */ if (ret) inet_sk(ret)->cork.fl.u.ip4 = fl4; -out: return ret; +out: + return ret; +out_free: + reqsk_free(req); +out_drop: + return NULL; } diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index f63a545a7374..7e4f16a7dcc1 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -50,26 +50,22 @@ static int tcp_plb_max_cong_thresh = 256; static int sysctl_tcp_low_latency __read_mostly; /* Update system visible IP port range */ -static void set_local_port_range(struct net *net, int range[2]) +static void set_local_port_range(struct net *net, unsigned int low, unsigned int high) { - bool same_parity = !((range[0] ^ range[1]) & 1); + bool same_parity = !((low ^ high) & 1); - write_seqlock_bh(&net->ipv4.ip_local_ports.lock); if (same_parity && !net->ipv4.ip_local_ports.warned) { net->ipv4.ip_local_ports.warned = true; pr_err_ratelimited("ip_local_port_range: prefer different parity for start/end values.\n"); } - net->ipv4.ip_local_ports.range[0] = range[0]; - net->ipv4.ip_local_ports.range[1] = range[1]; - write_sequnlock_bh(&net->ipv4.ip_local_ports.lock); + WRITE_ONCE(net->ipv4.ip_local_ports.range, high << 16 | low); } /* Validate changes from /proc interface. */ static int ipv4_local_port_range(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { - struct net *net = - container_of(table->data, struct net, ipv4.ip_local_ports.range); + struct net *net = table->data; int ret; int range[2]; struct ctl_table tmp = { @@ -93,7 +89,7 @@ static int ipv4_local_port_range(struct ctl_table *table, int write, (range[0] < READ_ONCE(net->ipv4.sysctl_ip_prot_sock))) ret = -EINVAL; else - set_local_port_range(net, range); + set_local_port_range(net, range[0], range[1]); } return ret; @@ -733,8 +729,8 @@ static struct ctl_table ipv4_net_table[] = { }, { .procname = "ip_local_port_range", - .maxlen = sizeof(init_net.ipv4.ip_local_ports.range), - .data = &init_net.ipv4.ip_local_ports.range, + .maxlen = 0, + .data = &init_net, .mode = 0644, .proc_handler = ipv4_local_port_range, }, diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index ff6838ca2e58..1d6b80145efb 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2605,6 +2605,7 @@ void tcp_set_state(struct sock *sk, int state) BUILD_BUG_ON((int)BPF_TCP_LISTEN != (int)TCP_LISTEN); BUILD_BUG_ON((int)BPF_TCP_CLOSING != (int)TCP_CLOSING); BUILD_BUG_ON((int)BPF_TCP_NEW_SYN_RECV != (int)TCP_NEW_SYN_RECV); + BUILD_BUG_ON((int)BPF_TCP_BOUND_INACTIVE != (int)TCP_BOUND_INACTIVE); BUILD_BUG_ON((int)BPF_TCP_MAX_STATES != (int)TCP_MAX_STATES); /* bpf uapi header bpf.h defines an anonymous enum with values @@ -4585,6 +4586,97 @@ static void __init tcp_init_mem(void) sysctl_tcp_mem[2] = sysctl_tcp_mem[0] * 2; /* 9.37 % */ } +static void __init tcp_struct_check(void) +{ + /* TX read-mostly hotpath cache lines */ + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, max_window); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, rcv_ssthresh); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, reordering); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, notsent_lowat); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, gso_segs); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, lost_skb_hint); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, retransmit_skb_hint); + CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_tx, 40); + + /* TXRX read-mostly hotpath cache lines */ + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, tsoffset); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, snd_wnd); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, mss_cache); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, snd_cwnd); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, prr_out); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, lost_out); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, sacked_out); + CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_txrx, 31); + + /* RX read-mostly hotpath cache lines */ + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, copied_seq); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, rcv_tstamp); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, snd_wl1); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, tlp_high_seq); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, rttvar_us); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, retrans_out); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, advmss); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, urg_data); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, lost); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, rtt_min); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, out_of_order_queue); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, snd_ssthresh); + CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_rx, 69); + + /* TX read-write hotpath cache lines */ + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, segs_out); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, data_segs_out); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, bytes_sent); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, snd_sml); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, chrono_start); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, chrono_stat); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, write_seq); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, pushed_seq); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, lsndtime); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, mdev_us); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, tcp_wstamp_ns); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, tcp_clock_cache); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, tcp_mstamp); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, rtt_seq); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, tsorted_sent_queue); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, highest_sack); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, ecn_flags); + CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_tx, 113); + + /* TXRX read-write hotpath cache lines */ + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, pred_flags); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rcv_nxt); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, snd_nxt); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, snd_una); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, window_clamp); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, srtt_us); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, packets_out); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, snd_up); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, delivered); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, delivered_ce); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, app_limited); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rcv_wnd); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rx_opt); + CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_txrx, 76); + + /* RX read-write hotpath cache lines */ + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, bytes_received); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, segs_in); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, data_segs_in); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rcv_wup); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, max_packets_out); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, cwnd_usage_seq); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rate_delivered); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rate_interval_us); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rcv_rtt_last_tsecr); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, first_tx_mstamp); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, delivered_mstamp); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, bytes_acked); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rcv_rtt_est); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rcvq_space); + CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_rx, 99); +} + void __init tcp_init(void) { int max_rshare, max_wshare, cnt; @@ -4595,6 +4687,8 @@ void __init tcp_init(void) BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof_field(struct sk_buff, cb)); + tcp_struct_check(); + percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL); timer_setup(&tcp_orphan_timer, tcp_orphan_update, TIMER_DEFERRABLE); diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c index f8308d3f565e..87db432c6bb4 100644 --- a/net/ipv4/tcp_ao.c +++ b/net/ipv4/tcp_ao.c @@ -844,18 +844,30 @@ static struct tcp_ao_key *tcp_ao_inbound_lookup(unsigned short int family, } void tcp_ao_syncookie(struct sock *sk, const struct sk_buff *skb, - struct tcp_request_sock *treq, - unsigned short int family, int l3index) + struct request_sock *req, unsigned short int family) { + struct tcp_request_sock *treq = tcp_rsk(req); const struct tcphdr *th = tcp_hdr(skb); const struct tcp_ao_hdr *aoh; struct tcp_ao_key *key; + int l3index; + + /* treq->af_specific is used to perform TCP_AO lookup + * in tcp_create_openreq_child(). + */ +#if IS_ENABLED(CONFIG_IPV6) + if (family == AF_INET6) + treq->af_specific = &tcp_request_sock_ipv6_ops; + else +#endif + treq->af_specific = &tcp_request_sock_ipv4_ops; treq->used_tcp_ao = false; if (tcp_parse_auth_options(th, NULL, &aoh) || !aoh) return; + l3index = l3mdev_master_ifindex_by_index(sock_net(sk), inet_rsk(req)->ir_iif); key = tcp_ao_inbound_lookup(family, sk, skb, -1, aoh->keyid, l3index); if (!key) /* Key not found, continue without TCP-AO */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 701cb87043f2..df7b13f0e5e0 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -202,23 +202,17 @@ static void bpf_skops_established(struct sock *sk, int bpf_op, } #endif -static void tcp_gro_dev_warn(struct sock *sk, const struct sk_buff *skb, - unsigned int len) +static __cold void tcp_gro_dev_warn(const struct sock *sk, const struct sk_buff *skb, + unsigned int len) { - static bool __once __read_mostly; + struct net_device *dev; - if (!__once) { - struct net_device *dev; - - __once = true; - - rcu_read_lock(); - dev = dev_get_by_index_rcu(sock_net(sk), skb->skb_iif); - if (!dev || len >= dev->mtu) - pr_warn("%s: Driver has suspect GRO implementation, TCP performance may be compromised.\n", - dev ? dev->name : "Unknown driver"); - rcu_read_unlock(); - } + rcu_read_lock(); + dev = dev_get_by_index_rcu(sock_net(sk), skb->skb_iif); + if (!dev || len >= READ_ONCE(dev->mtu)) + pr_warn("%s: Driver has suspect GRO implementation, TCP performance may be compromised.\n", + dev ? dev->name : "Unknown driver"); + rcu_read_unlock(); } /* Adapt the MSS value used to make delayed ack decision to the @@ -250,9 +244,8 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb) icsk->icsk_ack.rcv_mss = min_t(unsigned int, len, tcp_sk(sk)->advmss); /* Account for possibly-removed options */ - if (unlikely(len > icsk->icsk_ack.rcv_mss + - MAX_TCP_OPTION_SPACE)) - tcp_gro_dev_warn(sk, skb, len); + DO_ONCE_LITE_IF(len > icsk->icsk_ack.rcv_mss + MAX_TCP_OPTION_SPACE, + tcp_gro_dev_warn, sk, skb, len); /* If the skb has a len of exactly 1*MSS and has the PSH bit * set then it is likely the end of an application write. So * more data may not be arriving soon, and yet the data sender diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 0c50c5a32b84..4bac6e319aca 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -482,6 +482,7 @@ int tcp_v4_err(struct sk_buff *skb, u32 info) const int code = icmp_hdr(skb)->code; struct sock *sk; struct request_sock *fastopen; + bool harderr = false; u32 seq, snd_una; int err; struct net *net = dev_net(skb->dev); @@ -555,6 +556,7 @@ int tcp_v4_err(struct sk_buff *skb, u32 info) goto out; case ICMP_PARAMETERPROB: err = EPROTO; + harderr = true; break; case ICMP_DEST_UNREACH: if (code > NR_ICMP_UNREACH) @@ -579,6 +581,7 @@ int tcp_v4_err(struct sk_buff *skb, u32 info) } err = icmp_err_convert[code].errno; + harderr = icmp_err_convert[code].fatal; /* check if this ICMP message allows revert of backoff. * (see RFC 6069) */ @@ -604,6 +607,9 @@ int tcp_v4_err(struct sk_buff *skb, u32 info) ip_icmp_error(sk, skb, err, th->dest, info, (u8 *)th); + if (!harderr) + break; + if (!sock_owned_by_user(sk)) { WRITE_ONCE(sk->sk_err, err); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 1f9f6c1c196b..d1ad20ce1c8c 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -626,7 +626,6 @@ void tcp_retransmit_timer(struct sock *sk) * implemented ftp to mars will work nicely. We will have to fix * the 120 second clamps though! */ - icsk->icsk_backoff++; out_reset_timer: /* If stream is thin, use linear timeouts. Since 'icsk_backoff' is @@ -647,11 +646,12 @@ out_reset_timer: tcp_rto_min(sk), TCP_RTO_MAX); } else if (sk->sk_state != TCP_SYN_SENT || - icsk->icsk_backoff > + tp->total_rto > READ_ONCE(net->ipv4.sysctl_tcp_syn_linear_timeouts)) { /* Use normal (exponential) backoff unless linear timeouts are * activated. */ + icsk->icsk_backoff++; icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); } inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index cc6a502db39d..fff78496803d 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -60,9 +60,9 @@ static void ip6_datagram_flow_key_init(struct flowi6 *fl6, if (!oif) { if (ipv6_addr_is_multicast(&fl6->daddr)) - oif = np->mcast_oif; + oif = READ_ONCE(np->mcast_oif); else - oif = np->ucast_oif; + oif = READ_ONCE(np->ucast_oif); } fl6->flowi6_oif = oif; @@ -229,7 +229,7 @@ ipv4_connected: } if (!sk->sk_bound_dev_if && (addr_type & IPV6_ADDR_MULTICAST)) - WRITE_ONCE(sk->sk_bound_dev_if, np->mcast_oif); + WRITE_ONCE(sk->sk_bound_dev_if, READ_ONCE(np->mcast_oif)); /* Connect to link-local address requires an interface */ if (!sk->sk_bound_dev_if) { diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index f62427097126..1635da07285f 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -584,9 +584,9 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, tmp_hdr.icmp6_pointer = htonl(info); if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) - fl6.flowi6_oif = np->mcast_oif; + fl6.flowi6_oif = READ_ONCE(np->mcast_oif); else if (!fl6.flowi6_oif) - fl6.flowi6_oif = np->ucast_oif; + fl6.flowi6_oif = READ_ONCE(np->ucast_oif); ipcm6_init_sk(&ipc6, sk); ipc6.sockc.mark = mark; @@ -770,9 +770,9 @@ static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb) np = inet6_sk(sk); if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) - fl6.flowi6_oif = np->mcast_oif; + fl6.flowi6_oif = READ_ONCE(np->mcast_oif); else if (!fl6.flowi6_oif) - fl6.flowi6_oif = np->ucast_oif; + fl6.flowi6_oif = READ_ONCE(np->ucast_oif); if (ip6_dst_lookup(net, sk, &dst, &fl6)) goto out; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 7d661735cb9d..56c3c467f9de 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -210,7 +210,7 @@ static int ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval, if (optlen < GROUP_FILTER_SIZE(0)) return -EINVAL; - if (optlen > READ_ONCE(sysctl_optmem_max)) + if (optlen > READ_ONCE(sock_net(sk)->core.sysctl_optmem_max)) return -ENOBUFS; gsf = memdup_sockptr(optval, optlen); @@ -244,7 +244,7 @@ static int compat_ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval, if (optlen < size0) return -EINVAL; - if (optlen > READ_ONCE(sysctl_optmem_max) - 4) + if (optlen > READ_ONCE(sock_net(sk)->core.sysctl_optmem_max) - 4) return -ENOBUFS; p = kmalloc(optlen + 4, GFP_KERNEL); @@ -509,6 +509,59 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname, if (optlen < sizeof(int)) return -EINVAL; return ip6_sock_set_addr_preferences(sk, val); + case IPV6_MULTICAST_IF: + if (sk->sk_type == SOCK_STREAM) + return -ENOPROTOOPT; + if (optlen < sizeof(int)) + return -EINVAL; + if (val) { + struct net_device *dev; + int bound_dev_if, midx; + + rcu_read_lock(); + + dev = dev_get_by_index_rcu(net, val); + if (!dev) { + rcu_read_unlock(); + return -ENODEV; + } + midx = l3mdev_master_ifindex_rcu(dev); + + rcu_read_unlock(); + + bound_dev_if = READ_ONCE(sk->sk_bound_dev_if); + if (bound_dev_if && + bound_dev_if != val && + (!midx || midx != bound_dev_if)) + return -EINVAL; + } + WRITE_ONCE(np->mcast_oif, val); + return 0; + case IPV6_UNICAST_IF: + { + struct net_device *dev; + int ifindex; + + if (optlen != sizeof(int)) + return -EINVAL; + + ifindex = (__force int)ntohl((__force __be32)val); + if (!ifindex) { + WRITE_ONCE(np->ucast_oif, 0); + return 0; + } + + dev = dev_get_by_index(net, ifindex); + if (!dev) + return -EADDRNOTAVAIL; + dev_put(dev); + + if (READ_ONCE(sk->sk_bound_dev_if)) + return -EINVAL; + + WRITE_ONCE(np->ucast_oif, ifindex); + return 0; + } } if (needs_rtnl) rtnl_lock(); @@ -829,67 +882,6 @@ done: break; } - - case IPV6_UNICAST_IF: - { - struct net_device *dev = NULL; - int ifindex; - - if (optlen != sizeof(int)) - goto e_inval; - - ifindex = (__force int)ntohl((__force __be32)val); - if (ifindex == 0) { - np->ucast_oif = 0; - retv = 0; - break; - } - - dev = dev_get_by_index(net, ifindex); - retv = -EADDRNOTAVAIL; - if (!dev) - break; - dev_put(dev); - - retv = -EINVAL; - if (sk->sk_bound_dev_if) - break; - - np->ucast_oif = ifindex; - retv = 0; - break; - } - - case IPV6_MULTICAST_IF: - if (sk->sk_type == SOCK_STREAM) - break; - if (optlen < sizeof(int)) - goto e_inval; - - if (val) { - struct net_device *dev; - int midx; - - rcu_read_lock(); - - dev = dev_get_by_index_rcu(net, val); - if (!dev) { - rcu_read_unlock(); - retv = -ENODEV; - break; - } - midx = l3mdev_master_ifindex_rcu(dev); - - rcu_read_unlock(); - - if (sk->sk_bound_dev_if && - sk->sk_bound_dev_if != val && - (!midx || midx != sk->sk_bound_dev_if)) - goto e_inval; - } - np->mcast_oif = val; - retv = 0; - break; case IPV6_ADD_MEMBERSHIP: case IPV6_DROP_MEMBERSHIP: { @@ -1161,10 +1153,12 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, sockopt_release_sock(sk); if (!skb) { if (np->rxopt.bits.rxinfo) { + int mcast_oif = READ_ONCE(np->mcast_oif); struct in6_pktinfo src_info; - src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif : + + src_info.ipi6_ifindex = mcast_oif ? : np->sticky_pktinfo.ipi6_ifindex; - src_info.ipi6_addr = np->mcast_oif ? sk->sk_v6_daddr : np->sticky_pktinfo.ipi6_addr; + src_info.ipi6_addr = mcast_oif ? sk->sk_v6_daddr : np->sticky_pktinfo.ipi6_addr; put_cmsg(&msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info); } if (np->rxopt.bits.rxhlim) { @@ -1178,11 +1172,13 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, put_cmsg(&msg, SOL_IPV6, IPV6_TCLASS, sizeof(tclass), &tclass); } if (np->rxopt.bits.rxoinfo) { + int mcast_oif = READ_ONCE(np->mcast_oif); struct in6_pktinfo src_info; - src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif : + + src_info.ipi6_ifindex = mcast_oif ? : np->sticky_pktinfo.ipi6_ifindex; - src_info.ipi6_addr = np->mcast_oif ? sk->sk_v6_daddr : - np->sticky_pktinfo.ipi6_addr; + src_info.ipi6_addr = mcast_oif ? sk->sk_v6_daddr : + np->sticky_pktinfo.ipi6_addr; put_cmsg(&msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info); } if (np->rxopt.bits.rxohlim) { @@ -1359,7 +1355,7 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, break; case IPV6_MULTICAST_IF: - val = np->mcast_oif; + val = READ_ONCE(np->mcast_oif); break; case IPV6_MULTICAST_ALL: @@ -1367,7 +1363,7 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, break; case IPV6_UNICAST_IF: - val = (__force int)htonl((__u32) np->ucast_oif); + val = (__force int)htonl((__u32) READ_ONCE(np->ucast_oif)); break; case IPV6_MTU_DISCOVER: diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index d2098dd4ceae..ef2059c88955 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -107,9 +107,9 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) oif = np->sticky_pktinfo.ipi6_ifindex; if (!oif && ipv6_addr_is_multicast(daddr)) - oif = np->mcast_oif; + oif = READ_ONCE(np->mcast_oif); else if (!oif) - oif = np->ucast_oif; + oif = READ_ONCE(np->ucast_oif); addr_type = ipv6_addr_type(daddr); if ((__ipv6_addr_needs_scope_id(addr_type) && !oif) || @@ -157,9 +157,9 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) rt = (struct rt6_info *) dst; if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) - fl6.flowi6_oif = np->mcast_oif; + fl6.flowi6_oif = READ_ONCE(np->mcast_oif); else if (!fl6.flowi6_oif) - fl6.flowi6_oif = np->ucast_oif; + fl6.flowi6_oif = READ_ONCE(np->ucast_oif); pfh.icmph.type = user_icmph.icmp6_type; pfh.icmph.code = user_icmph.icmp6_code; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index dd0a4e73e602..03dbb874c363 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -876,9 +876,9 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) final_p = fl6_update_dst(&fl6, opt, &final); if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) - fl6.flowi6_oif = np->mcast_oif; + fl6.flowi6_oif = READ_ONCE(np->mcast_oif); else if (!fl6.flowi6_oif) - fl6.flowi6_oif = np->ucast_oif; + fl6.flowi6_oif = READ_ONCE(np->ucast_oif); security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6)); if (hdrincl) diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 12eedc6ca2cc..c8d2ca27220c 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -114,76 +114,82 @@ __u32 cookie_v6_init_sequence(const struct sk_buff *skb, __u16 *mssp) return __cookie_v6_init_sequence(iph, th, mssp); } -int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th, - __u32 cookie) +int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th) { + __u32 cookie = ntohl(th->ack_seq) - 1; __u32 seq = ntohl(th->seq) - 1; - __u32 mssind = check_tcp_syn_cookie(cookie, &iph->saddr, &iph->daddr, - th->source, th->dest, seq); + __u32 mssind; + + mssind = check_tcp_syn_cookie(cookie, &iph->saddr, &iph->daddr, + th->source, th->dest, seq); return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0; } EXPORT_SYMBOL_GPL(__cookie_v6_check); -struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) +static struct request_sock *cookie_tcp_check(struct net *net, struct sock *sk, + struct sk_buff *skb) { struct tcp_options_received tcp_opt; - struct inet_request_sock *ireq; - struct tcp_request_sock *treq; - struct ipv6_pinfo *np = inet6_sk(sk); - struct tcp_sock *tp = tcp_sk(sk); - const struct tcphdr *th = tcp_hdr(skb); - __u32 cookie = ntohl(th->ack_seq) - 1; - struct sock *ret = sk; - struct request_sock *req; - int full_space, mss; - struct dst_entry *dst; - __u8 rcv_wscale; u32 tsoff = 0; - int l3index; - - if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) || - !th->ack || th->rst) - goto out; + int mss; if (tcp_synq_no_recent_overflow(sk)) goto out; - mss = __cookie_v6_check(ipv6_hdr(skb), th, cookie); - if (mss == 0) { - __NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED); + mss = __cookie_v6_check(ipv6_hdr(skb), tcp_hdr(skb)); + if (!mss) { + __NET_INC_STATS(net, LINUX_MIB_SYNCOOKIESFAILED); goto out; } - __NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV); + __NET_INC_STATS(net, LINUX_MIB_SYNCOOKIESRECV); /* check for timestamp cookie support */ memset(&tcp_opt, 0, sizeof(tcp_opt)); - tcp_parse_options(sock_net(sk), skb, &tcp_opt, 0, NULL); + tcp_parse_options(net, skb, &tcp_opt, 0, NULL); if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) { - tsoff = secure_tcpv6_ts_off(sock_net(sk), + tsoff = secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32, ipv6_hdr(skb)->saddr.s6_addr32); tcp_opt.rcv_tsecr -= tsoff; } - if (!cookie_timestamp_decode(sock_net(sk), &tcp_opt)) + if (!cookie_timestamp_decode(net, &tcp_opt)) goto out; - ret = NULL; - req = cookie_tcp_reqsk_alloc(&tcp6_request_sock_ops, - &tcp_request_sock_ipv6_ops, sk, skb); - if (!req) + return cookie_tcp_reqsk_alloc(&tcp6_request_sock_ops, sk, skb, + &tcp_opt, mss, tsoff); +out: + return ERR_PTR(-EINVAL); +} + +struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) +{ + const struct tcphdr *th = tcp_hdr(skb); + struct ipv6_pinfo *np = inet6_sk(sk); + struct tcp_sock *tp = tcp_sk(sk); + struct inet_request_sock *ireq; + struct net *net = sock_net(sk); + struct request_sock *req; + struct dst_entry *dst; + struct sock *ret = sk; + __u8 rcv_wscale; + int full_space; + + if (!READ_ONCE(net->ipv4.sysctl_tcp_syncookies) || + !th->ack || th->rst) goto out; + req = cookie_tcp_check(net, sk, skb); + if (IS_ERR(req)) + goto out; + if (!req) + goto out_drop; + ireq = inet_rsk(req); - treq = tcp_rsk(req); - treq->tfo_listener = false; - req->mss = mss; - ireq->ir_rmt_port = th->source; - ireq->ir_num = ntohs(th->dest); ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; @@ -197,31 +203,12 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ireq->pktopts = skb; } - ireq->ir_iif = inet_request_bound_dev_if(sk, skb); /* So that link locals have meaning */ if (!sk->sk_bound_dev_if && ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) ireq->ir_iif = tcp_v6_iif(skb); - ireq->ir_mark = inet_request_mark(sk, skb); - - req->num_retrans = 0; - ireq->snd_wscale = tcp_opt.snd_wscale; - ireq->sack_ok = tcp_opt.sack_ok; - ireq->wscale_ok = tcp_opt.wscale_ok; - ireq->tstamp_ok = tcp_opt.saw_tstamp; - req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; - treq->snt_synack = 0; - treq->rcv_isn = ntohl(th->seq) - 1; - treq->snt_isn = cookie; - treq->ts_off = 0; - treq->txhash = net_tx_rndhash(); - - l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif); - tcp_ao_syncookie(sk, skb, treq, AF_INET6, l3index); - - if (IS_ENABLED(CONFIG_SMC)) - ireq->smc_ok = 0; + tcp_ao_syncookie(sk, skb, req, AF_INET6); /* * We need to lookup the dst_entry to get the correct window size. @@ -243,7 +230,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) fl6.flowi6_uid = sk->sk_uid; security_req_classify_flow(req, flowi6_to_flowi_common(&fl6)); - dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p); + dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p); if (IS_ERR(dst)) goto out_free; } @@ -261,12 +248,13 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) dst_metric(dst, RTAX_INITRWND)); ireq->rcv_wscale = rcv_wscale; - ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, sock_net(sk), dst); + ireq->ecn_ok &= cookie_ecn_ok(net, dst); - ret = tcp_get_cookie_sock(sk, skb, req, dst, tsoff); + ret = tcp_get_cookie_sock(sk, skb, req, dst); out: return ret; out_free: reqsk_free(req); +out_drop: return NULL; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 8c6623496dd7..d1307d77a6f0 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -381,7 +381,7 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct tcp_sock *tp; __u32 seq, snd_una; struct sock *sk; - bool fatal; + bool harderr; int err; sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, @@ -402,9 +402,9 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return 0; } seq = ntohl(th->seq); - fatal = icmpv6_err_convert(type, code, &err); + harderr = icmpv6_err_convert(type, code, &err); if (sk->sk_state == TCP_NEW_SYN_RECV) { - tcp_req_err(sk, seq, fatal); + tcp_req_err(sk, seq, harderr); return 0; } @@ -489,6 +489,9 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th); + if (!harderr) + break; + if (!sock_owned_by_user(sk)) { WRITE_ONCE(sk->sk_err, err); sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */ @@ -1699,7 +1702,7 @@ ipv6_pktoptions: if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt && !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo) - np->mcast_oif = tcp_v6_iif(opt_skb); + WRITE_ONCE(np->mcast_oif, tcp_v6_iif(opt_skb)); if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) WRITE_ONCE(np->mcast_hops, ipv6_hdr(opt_skb)->hop_limit); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 622b10a549f7..594e3f23c129 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1541,10 +1541,10 @@ do_udp_sendmsg: connected = false; if (!fl6->flowi6_oif && ipv6_addr_is_multicast(&fl6->daddr)) { - fl6->flowi6_oif = np->mcast_oif; + fl6->flowi6_oif = READ_ONCE(np->mcast_oif); connected = false; } else if (!fl6->flowi6_oif) - fl6->flowi6_oif = np->ucast_oif; + fl6->flowi6_oif = READ_ONCE(np->ucast_oif); security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6)); diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index bb373e249237..dd3153966173 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -599,9 +599,9 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) final_p = fl6_update_dst(&fl6, opt, &final); if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) - fl6.flowi6_oif = np->mcast_oif; + fl6.flowi6_oif = READ_ONCE(np->mcast_oif); else if (!fl6.flowi6_oif) - fl6.flowi6_oif = np->ucast_oif; + fl6.flowi6_oif = READ_ONCE(np->ucast_oif); security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6)); diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile index c9eb52768133..4406b4f8f3b9 100644 --- a/net/mac80211/Makefile +++ b/net/mac80211/Makefile @@ -67,4 +67,6 @@ mac80211-$(CONFIG_MAC80211_RC_MINSTREL) += $(rc80211_minstrel-y) obj-y += tests/ +mac80211-y += wbrf.o + ccflags-y += -DDEBUG diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index eb1d3ef84353..489dd97f5172 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1270,7 +1270,7 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, return -EALREADY; if (params->smps_mode != NL80211_SMPS_OFF) - return -ENOTSUPP; + return -EOPNOTSUPP; link->smps_mode = IEEE80211_SMPS_OFF; @@ -2556,7 +2556,7 @@ static int ieee80211_update_mesh_config(struct wiphy *wiphy, * devices that report signal in dBm. */ if (!ieee80211_hw_check(&sdata->local->hw, SIGNAL_DBM)) - return -ENOTSUPP; + return -EOPNOTSUPP; conf->rssi_threshold = nconf->rssi_threshold; } if (_chg_mesh_attr(NL80211_MESHCONF_HT_OPMODE, mask)) { diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 1d928f29ad6f..ef4c2cebc080 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -507,11 +507,16 @@ static void _ieee80211_change_chanctx(struct ieee80211_local *local, WARN_ON(!cfg80211_chandef_compatible(&ctx->conf.def, chandef)); + ieee80211_remove_wbrf(local, &ctx->conf.def); + ctx->conf.def = *chandef; /* check if min chanctx also changed */ changed = IEEE80211_CHANCTX_CHANGE_WIDTH | _ieee80211_recalc_chanctx_min_def(local, ctx, rsvd_for); + + ieee80211_add_wbrf(local, &ctx->conf.def); + drv_change_chanctx(local, ctx, changed); if (!local->use_chanctx) { @@ -667,6 +672,8 @@ static int ieee80211_add_chanctx(struct ieee80211_local *local, lockdep_assert_wiphy(local->hw.wiphy); + ieee80211_add_wbrf(local, &ctx->conf.def); + if (!local->use_chanctx) local->hw.conf.radar_enabled = ctx->conf.radar_enabled; @@ -746,6 +753,8 @@ static void ieee80211_del_chanctx(struct ieee80211_local *local, } ieee80211_recalc_idle(local); + + ieee80211_remove_wbrf(local, &ctx->conf.def); } static void ieee80211_free_chanctx(struct ieee80211_local *local, @@ -858,7 +867,7 @@ static int ieee80211_assign_link_chanctx(struct ieee80211_link_data *link, int ret = 0; if (WARN_ON(sdata->vif.type == NL80211_IFTYPE_NAN)) - return -ENOTSUPP; + return -EOPNOTSUPP; conf = rcu_dereference_protected(link->conf->chanctx_conf, lockdep_is_held(&local->hw.wiphy->mtx)); @@ -1106,7 +1115,7 @@ int ieee80211_link_reserve_chanctx(struct ieee80211_link_data *link, curr_ctx = ieee80211_link_get_chanctx(link); if (curr_ctx && local->use_chanctx && !local->ops->switch_vif_chanctx) - return -ENOTSUPP; + return -EOPNOTSUPP; new_ctx = ieee80211_find_reservation_chanctx(local, chandef, mode); if (!new_ctx) { diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index b575ae90e57f..74be49191e70 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -497,6 +497,7 @@ static const char *hw_flag_names[] = { FLAG(SUPPORTS_CONC_MON_RX_DECAP), FLAG(DETECTS_COLOR_COLLISION), FLAG(MLO_MCAST_MULTI_LINK_TX), + FLAG(DISALLOW_PUNCTURING), #undef FLAG }; diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index f690c385a345..e9219f927875 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -805,7 +805,7 @@ drv_cancel_remain_on_channel(struct ieee80211_local *local, static inline int drv_set_ringparam(struct ieee80211_local *local, u32 tx, u32 rx) { - int ret = -ENOTSUPP; + int ret = -EOPNOTSUPP; might_sleep(); lockdep_assert_wiphy(local->hw.wiphy); diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 8b1e02f2f9ae..8f2b445a5ec3 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -485,7 +485,7 @@ int ieee80211_ibss_csa_beacon(struct ieee80211_sub_if_data *sdata, ifibss->ssid_len, IEEE80211_BSS_TYPE_IBSS, IEEE80211_PRIVACY(ifibss->privacy)); - if (WARN_ON(!cbss)) + if (unlikely(!cbss)) return -EINVAL; rcu_read_lock(); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 84df104f272b..29312f6638a1 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -92,11 +92,14 @@ enum ieee80211_status_data { IEEE80211_STATUS_SUBDATA_MASK = 0xff0, }; -/* - * Keep a station's queues on the active list for deficit accounting purposes - * if it was active or queued during the last 100ms - */ -#define AIRTIME_ACTIVE_DURATION (HZ / 10) +static inline bool +ieee80211_sta_keep_active(struct sta_info *sta, u8 ac) +{ + /* Keep a station's queues on the active list for deficit accounting + * purposes if it was active or queued during the last 100ms. + */ + return time_before_eq(jiffies, sta->airtime[ac].last_active + HZ / 10); +} struct ieee80211_bss { u32 device_ts_beacon, device_ts_presp; @@ -1559,6 +1562,8 @@ struct ieee80211_local { /* extended capabilities provided by mac80211 */ u8 ext_capa[8]; + + bool wbrf_supported; }; static inline struct ieee80211_sub_if_data * @@ -2600,4 +2605,9 @@ ieee80211_eht_cap_ie_to_sta_eht_cap(struct ieee80211_sub_if_data *sdata, const struct ieee80211_eht_cap_elem *eht_cap_ie_elem, u8 eht_cap_len, struct link_sta_info *link_sta); + +void ieee80211_check_wbrf_support(struct ieee80211_local *local); +void ieee80211_add_wbrf(struct ieee80211_local *local, struct cfg80211_chan_def *chandef); +void ieee80211_remove_wbrf(struct ieee80211_local *local, struct cfg80211_chan_def *chandef); + #endif /* IEEE80211_I_H */ diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 033a5261ac3a..f2ece7793573 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -1405,6 +1405,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) debugfs_hw_add(local); rate_control_add_debugfs(local); + ieee80211_check_wbrf_support(local); + rtnl_lock(); wiphy_lock(hw->wiphy); diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 775d52561c54..024f48db6b05 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -151,7 +151,7 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags, break; default: kfree_skb(skb); - return -ENOTSUPP; + return -EOPNOTSUPP; } *pos++ = ie_len; *pos++ = flags; diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 8a3f44ce3e04..735edde1bd81 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -676,10 +676,10 @@ struct mesh_path *mesh_path_add(struct ieee80211_sub_if_data *sdata, if (ether_addr_equal(dst, sdata->vif.addr)) /* never add ourselves as neighbours */ - return ERR_PTR(-ENOTSUPP); + return ERR_PTR(-EOPNOTSUPP); if (is_multicast_ether_addr(dst)) - return ERR_PTR(-ENOTSUPP); + return ERR_PTR(-EOPNOTSUPP); if (atomic_add_unless(&sdata->u.mesh.mpaths, 1, MESH_MAX_MPATHS) == 0) return ERR_PTR(-ENOSPC); @@ -719,10 +719,10 @@ int mpp_path_add(struct ieee80211_sub_if_data *sdata, if (ether_addr_equal(dst, sdata->vif.addr)) /* never add ourselves as neighbours */ - return -ENOTSUPP; + return -EOPNOTSUPP; if (is_multicast_ether_addr(dst)) - return -ENOTSUPP; + return -EOPNOTSUPP; new_mpath = mesh_path_new(sdata, dst, GFP_ATOMIC); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index c8998cf01b7a..40a4fbfff530 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -135,6 +135,7 @@ ieee80211_handle_puncturing_bitmap(struct ieee80211_link_data *link, u16 bitmap, u64 *changed) { struct cfg80211_chan_def *chandef = &link->conf->chandef; + struct ieee80211_local *local = link->sdata->local; u16 extracted; u64 _changed = 0; @@ -147,7 +148,9 @@ ieee80211_handle_puncturing_bitmap(struct ieee80211_link_data *link, bitmap); if (cfg80211_valid_disable_subchannel_bitmap(&bitmap, - chandef)) + chandef) && + !(bitmap && ieee80211_hw_check(&local->hw, + DISALLOW_PUNCTURING))) break; link->u.mgd.conn_flags |= ieee80211_chandef_downgrade(chandef); @@ -1382,7 +1385,7 @@ static int ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) struct ieee80211_mgmt *mgmt; u8 *pos, qos_info, *ie_start; size_t offset, noffset; - u16 capab = WLAN_CAPABILITY_ESS, link_capab; + u16 capab = 0, link_capab; __le16 listen_int; struct element *ext_capa = NULL; enum nl80211_iftype iftype = ieee80211_vif_type_p2p(&sdata->vif); @@ -1529,6 +1532,17 @@ static int ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) *pos++ = assoc_data->ssid_len; memcpy(pos, assoc_data->ssid, assoc_data->ssid_len); + /* + * This bit is technically reserved, so it shouldn't matter for either + * the AP or us, but it also means we shouldn't set it. However, we've + * always set it in the past, and apparently some EHT APs check that + * we don't set it. To avoid interoperability issues with old APs that + * for some reason check it and want it to be set, set the bit for all + * pre-EHT connections as we used to do. + */ + if (link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_EHT) + capab |= WLAN_CAPABILITY_ESS; + /* add the elements for the assoc (main) link */ link_capab = capab; offset = ieee80211_assoc_link_elems(sdata, skb, &link_capab, @@ -5682,6 +5696,7 @@ static bool ieee80211_config_puncturing(struct ieee80211_link_data *link, const struct ieee80211_eht_operation *eht_oper, u64 *changed) { + struct ieee80211_local *local = link->sdata->local; u16 bitmap = 0, extracted; if ((eht_oper->params & IEEE80211_EHT_OPER_INFO_PRESENT) && @@ -5713,6 +5728,9 @@ static bool ieee80211_config_puncturing(struct ieee80211_link_data *link, return false; } + if (bitmap && ieee80211_hw_check(&local->hw, DISALLOW_PUNCTURING)) + return false; + ieee80211_handle_puncturing_bitmap(link, eht_oper, bitmap, changed); return true; } @@ -7586,7 +7604,8 @@ ieee80211_setup_assoc_link(struct ieee80211_sub_if_data *sdata, bitmap = get_unaligned_le16(disable_subchannel_bitmap); if (cfg80211_valid_disable_subchannel_bitmap(&bitmap, - &link->conf->chandef)) + &link->conf->chandef) && + !(bitmap && ieee80211_hw_check(&local->hw, DISALLOW_PUNCTURING))) ieee80211_handle_puncturing_bitmap(link, eht_oper, bitmap, diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 64352e4e6d00..bbfdcb0ade72 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -920,7 +920,7 @@ static void ieee80211_parse_qos(struct ieee80211_rx_data *rx) * Drivers always need to pass packets that are aligned to two-byte boundaries * to the stack. * - * Additionally, should, if possible, align the payload data in a way that + * Additionally, they should, if possible, align the payload data in a way that * guarantees that the contained IP header is aligned to a four-byte * boundary. In the case of regular frames, this simply means aligning the * payload to a four-byte boundary (because either the IP header is directly @@ -936,7 +936,7 @@ static void ieee80211_parse_qos(struct ieee80211_rx_data *rx) * subframe to a length that is a multiple of four. * * Padding like Atheros hardware adds which is between the 802.11 header and - * the payload is not supported, the driver is required to move the 802.11 + * the payload is not supported; the driver is required to move the 802.11 * header to be directly in front of the payload in that case. */ static void ieee80211_verify_alignment(struct ieee80211_rx_data *rx) diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 24fa06105378..645355e5f1bc 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -194,11 +194,32 @@ ieee80211_bss_info_update(struct ieee80211_local *local, if (scan_sdata && scan_sdata->vif.type == NL80211_IFTYPE_STATION && scan_sdata->vif.cfg.assoc && ieee80211_have_rx_timestamp(rx_status)) { - bss_meta.parent_tsf = - ieee80211_calculate_rx_timestamp(local, rx_status, - len + FCS_LEN, 24); - ether_addr_copy(bss_meta.parent_bssid, - scan_sdata->vif.bss_conf.bssid); + struct ieee80211_bss_conf *link_conf = NULL; + + /* for an MLO connection, set the TSF data only in case we have + * an indication on which of the links the frame was received + */ + if (ieee80211_vif_is_mld(&scan_sdata->vif)) { + if (rx_status->link_valid) { + s8 link_id = rx_status->link_id; + + link_conf = + rcu_dereference(scan_sdata->vif.link_conf[link_id]); + } + } else { + link_conf = &scan_sdata->vif.bss_conf; + } + + if (link_conf) { + bss_meta.parent_tsf = + ieee80211_calculate_rx_timestamp(local, + rx_status, + len + FCS_LEN, + 24); + + ether_addr_copy(bss_meta.parent_bssid, + link_conf->bssid); + } } rcu_read_unlock(); @@ -666,6 +687,21 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata, if (local->scan_req) return -EBUSY; + /* For an MLO connection, if a link ID was specified, validate that it + * is indeed active. If no link ID was specified, select one of the + * active links. + */ + if (ieee80211_vif_is_mld(&sdata->vif)) { + if (req->tsf_report_link_id >= 0) { + if (!(sdata->vif.active_links & + BIT(req->tsf_report_link_id))) + return -EINVAL; + } else { + req->tsf_report_link_id = + __ffs(sdata->vif.active_links); + } + } + if (!__ieee80211_can_leave_ch(sdata)) return -EBUSY; @@ -714,6 +750,8 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata, local->hw_scan_req->req.duration = req->duration; local->hw_scan_req->req.duration_mandatory = req->duration_mandatory; + local->hw_scan_req->req.tsf_report_link_id = + req->tsf_report_link_id; local->hw_scan_band = 0; local->hw_scan_req->req.n_6ghz_params = req->n_6ghz_params; @@ -1251,7 +1289,7 @@ int __ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata, iebufsz = local->scan_ies_len + req->ie_len; if (!local->ops->sched_scan_start) - return -ENOTSUPP; + return -EOPNOTSUPP; for (i = 0; i < NUM_NL80211_BANDS; i++) { if (local->hw.wiphy->bands[i]) { @@ -1316,7 +1354,7 @@ int ieee80211_request_sched_scan_stop(struct ieee80211_local *local) lockdep_assert_wiphy(local->hw.wiphy); if (!local->ops->sched_scan_stop) - return -ENOTSUPP; + return -EOPNOTSUPP; /* We don't want to restart sched scan anymore. */ RCU_INIT_POINTER(local->sched_scan_req, NULL); diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 0ba613dd1cc4..bf1adcd96b41 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -40,7 +40,7 @@ * either sta_info_insert() or sta_info_insert_rcu(); only in the latter * case (which acquires an rcu read section but must not be called from * within one) will the pointer still be valid after the call. Note that - * the caller may not do much with the STA info before inserting it, in + * the caller may not do much with the STA info before inserting it; in * particular, it may not start any mesh peer link management or add * encryption keys. * @@ -58,7 +58,7 @@ * In order to remove a STA info structure, various sta_info_destroy_*() * calls are available. * - * There is no concept of ownership on a STA entry, each structure is + * There is no concept of ownership on a STA entry; each structure is * owned by the global hash table/list until it is removed. All users of * the structure need to be RCU protected so that the structure won't be * freed before they are done using it. @@ -2268,7 +2268,6 @@ void ieee80211_sta_register_airtime(struct ieee80211_sta *pubsta, u8 tid, struct ieee80211_local *local = sta->sdata->local; u8 ac = ieee80211_ac_from_tid(tid); u32 airtime = 0; - u32 diff; if (sta->local->airtime_flags & AIRTIME_USE_TX) airtime += tx_airtime; @@ -2279,8 +2278,7 @@ void ieee80211_sta_register_airtime(struct ieee80211_sta *pubsta, u8 tid, sta->airtime[ac].tx_airtime += tx_airtime; sta->airtime[ac].rx_airtime += rx_airtime; - diff = (u32)jiffies - sta->airtime[ac].last_active; - if (diff <= AIRTIME_ACTIVE_DURATION) + if (ieee80211_sta_keep_active(sta, ac)) sta->airtime[ac].deficit -= airtime; spin_unlock_bh(&local->active_txq_lock[ac]); diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 7acf2223e47a..5ef1554f991f 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -138,7 +138,7 @@ enum ieee80211_agg_stop_reason { struct airtime_info { u64 rx_airtime; u64 tx_airtime; - u32 last_active; + unsigned long last_active; s32 deficit; atomic_t aql_tx_pending; /* Estimated airtime for frames pending */ u32 aql_limit_low; diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index 05a7dff69fe9..49730b424141 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -1001,7 +1001,7 @@ ieee80211_tdls_build_mgmt_packet_data(struct ieee80211_sub_if_data *sdata, skb); break; default: - ret = -ENOTSUPP; + ret = -EOPNOTSUPP; break; } @@ -1071,7 +1071,7 @@ ieee80211_tdls_prep_mgmt_packet(struct wiphy *wiphy, struct net_device *dev, /* any value is ok */ break; default: - ret = -ENOTSUPP; + ret = -EOPNOTSUPP; break; } @@ -1177,7 +1177,7 @@ ieee80211_tdls_mgmt_setup(struct wiphy *wiphy, struct net_device *dev, smps_mode != IEEE80211_SMPS_OFF) { tdls_dbg(sdata, "Aborting TDLS setup due to SMPS mode %d\n", smps_mode); - return -ENOTSUPP; + return -EOPNOTSUPP; } lockdep_assert_wiphy(local->hw.wiphy); @@ -1289,7 +1289,7 @@ int ieee80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev, int ret; if (!(wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS)) - return -ENOTSUPP; + return -EOPNOTSUPP; /* make sure we are in managed mode, and associated */ if (sdata->vif.type != NL80211_IFTYPE_STATION || @@ -1446,7 +1446,7 @@ int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev, lockdep_assert_wiphy(local->hw.wiphy); if (!(wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS)) - return -ENOTSUPP; + return -EOPNOTSUPP; if (sdata->vif.type != NL80211_IFTYPE_STATION) return -EINVAL; @@ -1459,7 +1459,7 @@ int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev, case NL80211_TDLS_SETUP: case NL80211_TDLS_DISCOVERY_REQ: /* We don't support in-driver setup/teardown/discovery */ - return -ENOTSUPP; + return -EOPNOTSUPP; } /* protect possible bss_conf changes and avoid concurrency in @@ -1510,7 +1510,7 @@ int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev, return ret; break; default: - return -ENOTSUPP; + return -EOPNOTSUPP; } if (ether_addr_equal(sdata->u.mgd.tdls_peer, peer)) { @@ -1673,7 +1673,7 @@ ieee80211_tdls_channel_switch(struct wiphy *wiphy, struct net_device *dev, if (!test_sta_flag(sta, WLAN_STA_TDLS_CHAN_SWITCH)) { tdls_dbg(sdata, "TDLS channel switch unsupported by %pM\n", addr); - ret = -ENOTSUPP; + ret = -EOPNOTSUPP; goto out; } @@ -1993,7 +1993,7 @@ ieee80211_process_tdls_channel_switch_req(struct ieee80211_sub_if_data *sdata, if (!sta->sta.deflink.ht_cap.ht_supported && elems->sec_chan_offs && elems->sec_chan_offs->sec_chan_offs) { tdls_dbg(sdata, "TDLS chan switch - wide chan unsupported\n"); - ret = -ENOTSUPP; + ret = -EOPNOTSUPP; goto out; } diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index ed4fdf655343..314998fdb1a5 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -4013,14 +4013,13 @@ ieee80211_txq_set_active(struct txq_info *txqi) return; sta = container_of(txqi->txq.sta, struct sta_info, sta); - sta->airtime[txqi->txq.ac].last_active = (u32)jiffies; + sta->airtime[txqi->txq.ac].last_active = jiffies; } static bool ieee80211_txq_keep_active(struct txq_info *txqi) { struct sta_info *sta; - u32 diff; if (!txqi->txq.sta) return false; @@ -4029,9 +4028,7 @@ ieee80211_txq_keep_active(struct txq_info *txqi) if (ieee80211_sta_deficit(sta, txqi->txq.ac) >= 0) return false; - diff = (u32)jiffies - sta->airtime[txqi->txq.ac].last_active; - - return diff <= AIRTIME_ACTIVE_DURATION; + return ieee80211_sta_keep_active(sta, txqi->txq.ac); } struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8 ac) diff --git a/net/mac80211/wbrf.c b/net/mac80211/wbrf.c new file mode 100644 index 000000000000..a05c5b971789 --- /dev/null +++ b/net/mac80211/wbrf.c @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Wifi Band Exclusion Interface for WLAN + * Copyright (C) 2023 Advanced Micro Devices + * + */ + +#include <linux/acpi_amd_wbrf.h> +#include <linux/units.h> +#include <net/cfg80211.h> +#include "ieee80211_i.h" + +void ieee80211_check_wbrf_support(struct ieee80211_local *local) +{ + struct wiphy *wiphy = local->hw.wiphy; + struct device *dev; + + if (!wiphy) + return; + + dev = wiphy->dev.parent; + if (!dev) + return; + + local->wbrf_supported = acpi_amd_wbrf_supported_producer(dev); + dev_dbg(dev, "WBRF is %s supported\n", + local->wbrf_supported ? "" : "not"); +} + +static void get_chan_freq_boundary(u32 center_freq, u32 bandwidth, u64 *start, u64 *end) +{ + bandwidth *= KHZ_PER_MHZ; + center_freq *= KHZ_PER_MHZ; + + *start = center_freq - bandwidth / 2; + *end = center_freq + bandwidth / 2; + + /* Frequency in Hz is expected */ + *start = *start * HZ_PER_KHZ; + *end = *end * HZ_PER_KHZ; +} + +static void get_ranges_from_chandef(struct cfg80211_chan_def *chandef, + struct wbrf_ranges_in_out *ranges_in) +{ + u64 start_freq1, end_freq1; + u64 start_freq2, end_freq2; + int bandwidth; + + bandwidth = nl80211_chan_width_to_mhz(chandef->width); + + get_chan_freq_boundary(chandef->center_freq1, bandwidth, &start_freq1, &end_freq1); + + ranges_in->band_list[0].start = start_freq1; + ranges_in->band_list[0].end = end_freq1; + ranges_in->num_of_ranges = 1; + + if (chandef->width == NL80211_CHAN_WIDTH_80P80) { + get_chan_freq_boundary(chandef->center_freq2, bandwidth, &start_freq2, &end_freq2); + + ranges_in->band_list[1].start = start_freq2; + ranges_in->band_list[1].end = end_freq2; + ranges_in->num_of_ranges++; + } +} + +void ieee80211_add_wbrf(struct ieee80211_local *local, struct cfg80211_chan_def *chandef) +{ + struct wbrf_ranges_in_out ranges_in = {0}; + struct device *dev; + + if (!local->wbrf_supported) + return; + + dev = local->hw.wiphy->dev.parent; + + get_ranges_from_chandef(chandef, &ranges_in); + + acpi_amd_wbrf_add_remove(dev, WBRF_RECORD_ADD, &ranges_in); +} + +void ieee80211_remove_wbrf(struct ieee80211_local *local, struct cfg80211_chan_def *chandef) +{ + struct wbrf_ranges_in_out ranges_in = {0}; + struct device *dev; + + if (!local->wbrf_supported) + return; + + dev = local->hw.wiphy->dev.parent; + + get_ranges_from_chandef(chandef, &ranges_in); + + acpi_amd_wbrf_add_remove(dev, WBRF_RECORD_REMOVE, &ranges_in); +} diff --git a/net/mptcp/mptcp_pm_gen.c b/net/mptcp/mptcp_pm_gen.c index a2325e70ddab..670da7822e6c 100644 --- a/net/mptcp/mptcp_pm_gen.c +++ b/net/mptcp/mptcp_pm_gen.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) /* Do not edit directly, auto-generated from: */ -/* Documentation/netlink/specs/mptcp.yaml */ +/* Documentation/netlink/specs/mptcp_pm.yaml */ /* YNL-GEN kernel source */ #include <net/netlink.h> diff --git a/net/mptcp/mptcp_pm_gen.h b/net/mptcp/mptcp_pm_gen.h index 10579d184587..ac9fc7225b6a 100644 --- a/net/mptcp/mptcp_pm_gen.h +++ b/net/mptcp/mptcp_pm_gen.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ /* Do not edit directly, auto-generated from: */ -/* Documentation/netlink/specs/mptcp.yaml */ +/* Documentation/netlink/specs/mptcp_pm.yaml */ /* YNL-GEN kernel header */ #ifndef _LINUX_MPTCP_PM_GEN_H diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index 5c01b9bc619a..efecbe3cf415 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -276,12 +276,12 @@ int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info) if (!mptcp_pm_is_userspace(msk)) { GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected"); - goto remove_err; + goto out; } if (id_val == 0) { err = mptcp_userspace_pm_remove_id_zero_address(msk, info); - goto remove_err; + goto out; } lock_sock(sk); @@ -296,7 +296,7 @@ int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info) if (!match) { GENL_SET_ERR_MSG(info, "address with specified id not found"); release_sock(sk); - goto remove_err; + goto out; } list_move(&match->list, &free_list); @@ -310,7 +310,7 @@ int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info) } err = 0; - remove_err: +out: sock_put(sk); return err; } diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index aa1a93fe40ff..1240268f9e9e 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -1075,6 +1075,15 @@ static inline void __mptcp_do_fallback(struct mptcp_sock *msk) set_bit(MPTCP_FALLBACK_DONE, &msk->flags); } +static inline bool __mptcp_has_initial_subflow(const struct mptcp_sock *msk) +{ + struct sock *ssk = READ_ONCE(msk->first); + + return ssk && ((1 << inet_sk_state_load(ssk)) & + (TCPF_ESTABLISHED | TCPF_SYN_SENT | + TCPF_SYN_RECV | TCPF_LISTEN)); +} + static inline void mptcp_do_fallback(struct sock *ssk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index 353680733700..cabe856b2a45 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -938,6 +938,8 @@ void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info) info->mptcpi_bytes_sent = msk->bytes_sent; info->mptcpi_bytes_received = msk->bytes_received; info->mptcpi_bytes_retrans = msk->bytes_retrans; + info->mptcpi_subflows_total = info->mptcpi_subflows + + __mptcp_has_initial_subflow(msk); unlock_sock_fast(sk, slow); } EXPORT_SYMBOL_GPL(mptcp_diag_fill_info); diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h index 03757e76bb6b..374412ed780b 100644 --- a/net/ncsi/internal.h +++ b/net/ncsi/internal.h @@ -105,8 +105,11 @@ enum { struct ncsi_channel_version { - u32 version; /* Supported BCD encoded NCSI version */ - u32 alpha2; /* Supported BCD encoded NCSI version */ + u8 major; /* NCSI version major */ + u8 minor; /* NCSI version minor */ + u8 update; /* NCSI version update */ + char alpha1; /* NCSI version alpha1 */ + char alpha2; /* NCSI version alpha2 */ u8 fw_name[12]; /* Firmware name string */ u32 fw_version; /* Firmware version */ u16 pci_ids[4]; /* PCI identification */ diff --git a/net/ncsi/ncsi-cmd.c b/net/ncsi/ncsi-cmd.c index fd2236ee9a79..b3ff37a181d7 100644 --- a/net/ncsi/ncsi-cmd.c +++ b/net/ncsi/ncsi-cmd.c @@ -270,7 +270,8 @@ static struct ncsi_cmd_handler { { NCSI_PKT_CMD_GPS, 0, ncsi_cmd_handler_default }, { NCSI_PKT_CMD_OEM, -1, ncsi_cmd_handler_oem }, { NCSI_PKT_CMD_PLDM, 0, NULL }, - { NCSI_PKT_CMD_GPUUID, 0, ncsi_cmd_handler_default } + { NCSI_PKT_CMD_GPUUID, 0, ncsi_cmd_handler_default }, + { NCSI_PKT_CMD_GMCMA, 0, ncsi_cmd_handler_default } }; static struct ncsi_request *ncsi_alloc_command(struct ncsi_cmd_arg *nca) diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index d9da942ad53d..745c788f1d1d 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -689,8 +689,6 @@ static int set_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc, return 0; } -#if IS_ENABLED(CONFIG_NCSI_OEM_CMD_KEEP_PHY) - static int ncsi_oem_keep_phy_intel(struct ncsi_cmd_arg *nca) { unsigned char data[NCSI_OEM_INTEL_CMD_KEEP_PHY_LEN]; @@ -716,10 +714,6 @@ static int ncsi_oem_keep_phy_intel(struct ncsi_cmd_arg *nca) return ret; } -#endif - -#if IS_ENABLED(CONFIG_NCSI_OEM_CMD_GET_MAC) - /* NCSI OEM Command APIs */ static int ncsi_oem_gma_handler_bcm(struct ncsi_cmd_arg *nca) { @@ -856,8 +850,6 @@ static int ncsi_gma_handler(struct ncsi_cmd_arg *nca, unsigned int mf_id) return nch->handler(nca); } -#endif /* CONFIG_NCSI_OEM_CMD_GET_MAC */ - /* Determine if a given channel from the channel_queue should be used for Tx */ static bool ncsi_channel_is_tx(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc) @@ -1039,20 +1031,23 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp) goto error; } - nd->state = ncsi_dev_state_config_oem_gma; + nd->state = IS_ENABLED(CONFIG_NCSI_OEM_CMD_GET_MAC) + ? ncsi_dev_state_config_oem_gma + : ncsi_dev_state_config_clear_vids; break; case ncsi_dev_state_config_oem_gma: nd->state = ncsi_dev_state_config_clear_vids; - ret = -1; -#if IS_ENABLED(CONFIG_NCSI_OEM_CMD_GET_MAC) - nca.type = NCSI_PKT_CMD_OEM; nca.package = np->id; nca.channel = nc->id; ndp->pending_req_num = 1; - ret = ncsi_gma_handler(&nca, nc->version.mf_id); -#endif /* CONFIG_NCSI_OEM_CMD_GET_MAC */ - + if (nc->version.major >= 1 && nc->version.minor >= 2) { + nca.type = NCSI_PKT_CMD_GMCMA; + ret = ncsi_xmit_cmd(&nca); + } else { + nca.type = NCSI_PKT_CMD_OEM; + ret = ncsi_gma_handler(&nca, nc->version.mf_id); + } if (ret < 0) schedule_work(&ndp->work); @@ -1404,7 +1399,6 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp) schedule_work(&ndp->work); break; -#if IS_ENABLED(CONFIG_NCSI_OEM_CMD_GET_MAC) case ncsi_dev_state_probe_mlx_gma: ndp->pending_req_num = 1; @@ -1429,7 +1423,6 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp) nd->state = ncsi_dev_state_probe_cis; break; -#endif /* CONFIG_NCSI_OEM_CMD_GET_MAC */ case ncsi_dev_state_probe_cis: ndp->pending_req_num = NCSI_RESERVED_CHANNEL; @@ -1447,7 +1440,6 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp) if (IS_ENABLED(CONFIG_NCSI_OEM_CMD_KEEP_PHY)) nd->state = ncsi_dev_state_probe_keep_phy; break; -#if IS_ENABLED(CONFIG_NCSI_OEM_CMD_KEEP_PHY) case ncsi_dev_state_probe_keep_phy: ndp->pending_req_num = 1; @@ -1460,7 +1452,6 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp) nd->state = ncsi_dev_state_probe_gvi; break; -#endif /* CONFIG_NCSI_OEM_CMD_KEEP_PHY */ case ncsi_dev_state_probe_gvi: case ncsi_dev_state_probe_gc: case ncsi_dev_state_probe_gls: diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c index a3a6753a1db7..2f872d064396 100644 --- a/net/ncsi/ncsi-netlink.c +++ b/net/ncsi/ncsi-netlink.c @@ -71,8 +71,8 @@ static int ncsi_write_channel_info(struct sk_buff *skb, if (nc == nc->package->preferred_channel) nla_put_flag(skb, NCSI_CHANNEL_ATTR_FORCED); - nla_put_u32(skb, NCSI_CHANNEL_ATTR_VERSION_MAJOR, nc->version.version); - nla_put_u32(skb, NCSI_CHANNEL_ATTR_VERSION_MINOR, nc->version.alpha2); + nla_put_u32(skb, NCSI_CHANNEL_ATTR_VERSION_MAJOR, nc->version.major); + nla_put_u32(skb, NCSI_CHANNEL_ATTR_VERSION_MINOR, nc->version.minor); nla_put_string(skb, NCSI_CHANNEL_ATTR_VERSION_STR, nc->version.fw_name); vid_nest = nla_nest_start_noflag(skb, NCSI_CHANNEL_ATTR_VLAN_LIST); diff --git a/net/ncsi/ncsi-pkt.h b/net/ncsi/ncsi-pkt.h index ba66c7dc3a21..f2f3b5c1b941 100644 --- a/net/ncsi/ncsi-pkt.h +++ b/net/ncsi/ncsi-pkt.h @@ -197,9 +197,12 @@ struct ncsi_rsp_gls_pkt { /* Get Version ID */ struct ncsi_rsp_gvi_pkt { struct ncsi_rsp_pkt_hdr rsp; /* Response header */ - __be32 ncsi_version; /* NCSI version */ + unsigned char major; /* NCSI version major */ + unsigned char minor; /* NCSI version minor */ + unsigned char update; /* NCSI version update */ + unsigned char alpha1; /* NCSI version alpha1 */ unsigned char reserved[3]; /* Reserved */ - unsigned char alpha2; /* NCSI version */ + unsigned char alpha2; /* NCSI version alpha2 */ unsigned char fw_name[12]; /* f/w name string */ __be32 fw_version; /* f/w version */ __be16 pci_ids[4]; /* PCI IDs */ @@ -335,6 +338,14 @@ struct ncsi_rsp_gpuuid_pkt { __be32 checksum; }; +/* Get MC MAC Address */ +struct ncsi_rsp_gmcma_pkt { + struct ncsi_rsp_pkt_hdr rsp; + unsigned char address_count; + unsigned char reserved[3]; + unsigned char addresses[][ETH_ALEN]; +}; + /* AEN: Link State Change */ struct ncsi_aen_lsc_pkt { struct ncsi_aen_pkt_hdr aen; /* AEN header */ @@ -395,6 +406,7 @@ struct ncsi_aen_hncdsc_pkt { #define NCSI_PKT_CMD_GPUUID 0x52 /* Get package UUID */ #define NCSI_PKT_CMD_QPNPR 0x56 /* Query Pending NC PLDM request */ #define NCSI_PKT_CMD_SNPR 0x57 /* Send NC PLDM Reply */ +#define NCSI_PKT_CMD_GMCMA 0x58 /* Get MC MAC Address */ /* NCSI packet responses */ @@ -430,6 +442,7 @@ struct ncsi_aen_hncdsc_pkt { #define NCSI_PKT_RSP_GPUUID (NCSI_PKT_CMD_GPUUID + 0x80) #define NCSI_PKT_RSP_QPNPR (NCSI_PKT_CMD_QPNPR + 0x80) #define NCSI_PKT_RSP_SNPR (NCSI_PKT_CMD_SNPR + 0x80) +#define NCSI_PKT_RSP_GMCMA (NCSI_PKT_CMD_GMCMA + 0x80) /* NCSI response code/reason */ #define NCSI_PKT_RSP_C_COMPLETED 0x0000 /* Command Completed */ diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c index 069c2659074b..bee290d0f48b 100644 --- a/net/ncsi/ncsi-rsp.c +++ b/net/ncsi/ncsi-rsp.c @@ -19,6 +19,19 @@ #include "ncsi-pkt.h" #include "ncsi-netlink.h" +/* Nibbles within [0xA, 0xF] add zero "0" to the returned value. + * Optional fields (encoded as 0xFF) will default to zero. + */ +static u8 decode_bcd_u8(u8 x) +{ + int lo = x & 0xF; + int hi = x >> 4; + + lo = lo < 0xA ? lo : 0; + hi = hi < 0xA ? hi : 0; + return lo + hi * 10; +} + static int ncsi_validate_rsp_pkt(struct ncsi_request *nr, unsigned short payload) { @@ -755,9 +768,18 @@ static int ncsi_rsp_handler_gvi(struct ncsi_request *nr) if (!nc) return -ENODEV; - /* Update to channel's version info */ + /* Update channel's version info + * + * Major, minor, and update fields are supposed to be + * unsigned integers encoded as packed BCD. + * + * Alpha1 and alpha2 are ISO/IEC 8859-1 characters. + */ ncv = &nc->version; - ncv->version = ntohl(rsp->ncsi_version); + ncv->major = decode_bcd_u8(rsp->major); + ncv->minor = decode_bcd_u8(rsp->minor); + ncv->update = decode_bcd_u8(rsp->update); + ncv->alpha1 = rsp->alpha1; ncv->alpha2 = rsp->alpha2; memcpy(ncv->fw_name, rsp->fw_name, 12); ncv->fw_version = ntohl(rsp->fw_version); @@ -1069,6 +1091,44 @@ static int ncsi_rsp_handler_netlink(struct ncsi_request *nr) return ret; } +static int ncsi_rsp_handler_gmcma(struct ncsi_request *nr) +{ + struct ncsi_dev_priv *ndp = nr->ndp; + struct net_device *ndev = ndp->ndev.dev; + struct ncsi_rsp_gmcma_pkt *rsp; + struct sockaddr saddr; + int ret = -1; + int i; + + rsp = (struct ncsi_rsp_gmcma_pkt *)skb_network_header(nr->rsp); + saddr.sa_family = ndev->type; + ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE; + + netdev_info(ndev, "NCSI: Received %d provisioned MAC addresses\n", + rsp->address_count); + for (i = 0; i < rsp->address_count; i++) { + netdev_info(ndev, "NCSI: MAC address %d: %02x:%02x:%02x:%02x:%02x:%02x\n", + i, rsp->addresses[i][0], rsp->addresses[i][1], + rsp->addresses[i][2], rsp->addresses[i][3], + rsp->addresses[i][4], rsp->addresses[i][5]); + } + + for (i = 0; i < rsp->address_count; i++) { + memcpy(saddr.sa_data, &rsp->addresses[i], ETH_ALEN); + ret = ndev->netdev_ops->ndo_set_mac_address(ndev, &saddr); + if (ret < 0) { + netdev_warn(ndev, "NCSI: Unable to assign %pM to device\n", + saddr.sa_data); + continue; + } + netdev_warn(ndev, "NCSI: Set MAC address to %pM\n", saddr.sa_data); + break; + } + + ndp->gma_flag = ret == 0; + return ret; +} + static struct ncsi_rsp_handler { unsigned char type; int payload; @@ -1105,7 +1165,8 @@ static struct ncsi_rsp_handler { { NCSI_PKT_RSP_PLDM, -1, ncsi_rsp_handler_pldm }, { NCSI_PKT_RSP_GPUUID, 20, ncsi_rsp_handler_gpuuid }, { NCSI_PKT_RSP_QPNPR, -1, ncsi_rsp_handler_pldm }, - { NCSI_PKT_RSP_SNPR, -1, ncsi_rsp_handler_pldm } + { NCSI_PKT_RSP_SNPR, -1, ncsi_rsp_handler_pldm }, + { NCSI_PKT_RSP_GMCMA, -1, ncsi_rsp_handler_gmcma }, }; int ncsi_rcv_rsp(struct sk_buff *skb, struct net_device *dev, diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index eaf9f2ed0067..be74c0906dda 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1365,7 +1365,7 @@ static int set_mcast_if(struct sock *sk, struct net_device *dev) struct ipv6_pinfo *np = inet6_sk(sk); /* IPV6_MULTICAST_IF */ - np->mcast_oif = dev->ifindex; + WRITE_ONCE(np->mcast_oif, dev->ifindex); } #endif release_sock(sk); diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index 467671f2d42f..fbbc4fd37349 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -617,7 +617,7 @@ synproxy_recv_client_ack(struct net *net, struct synproxy_net *snet = synproxy_pernet(net); int mss; - mss = __cookie_v4_check(ip_hdr(skb), th, ntohl(th->ack_seq) - 1); + mss = __cookie_v4_check(ip_hdr(skb), th); if (mss == 0) { this_cpu_inc(snet->stats->cookie_invalid); return false; @@ -1034,7 +1034,7 @@ synproxy_recv_client_ack_ipv6(struct net *net, struct synproxy_net *snet = synproxy_pernet(net); int mss; - mss = nf_cookie_v6_check(ipv6_hdr(skb), th, ntohl(th->ack_seq) - 1); + mss = nf_cookie_v6_check(ipv6_hdr(skb), th); if (mss == 0) { this_cpu_inc(snet->stats->cookie_invalid); return false; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index eb086b06d60d..4ed8ffd58ff3 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1204,8 +1204,7 @@ struct sock *netlink_getsockbyfilp(struct file *filp) return sock; } -static struct sk_buff *netlink_alloc_large_skb(unsigned int size, - int broadcast) +struct sk_buff *netlink_alloc_large_skb(unsigned int size, int broadcast) { struct sk_buff *skb; void *data; @@ -1520,8 +1519,7 @@ out: int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 portid, u32 group, gfp_t allocation, - int (*filter)(struct sock *dsk, - struct sk_buff *skb, void *data), + netlink_filter_fn filter, void *filter_data) { struct net *net = sock_net(ssk); diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 9c7ffd10df2a..c0d15470a10b 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -631,6 +631,138 @@ static int genl_validate_ops(const struct genl_family *family) return 0; } +static void *genl_sk_priv_alloc(struct genl_family *family) +{ + void *priv; + + priv = kzalloc(family->sock_priv_size, GFP_KERNEL); + if (!priv) + return ERR_PTR(-ENOMEM); + + if (family->sock_priv_init) + family->sock_priv_init(priv); + + return priv; +} + +static void genl_sk_priv_free(const struct genl_family *family, void *priv) +{ + if (family->sock_priv_destroy) + family->sock_priv_destroy(priv); + kfree(priv); +} + +static int genl_sk_privs_alloc(struct genl_family *family) +{ + if (!family->sock_priv_size) + return 0; + + family->sock_privs = kzalloc(sizeof(*family->sock_privs), GFP_KERNEL); + if (!family->sock_privs) + return -ENOMEM; + xa_init(family->sock_privs); + return 0; +} + +static void genl_sk_privs_free(const struct genl_family *family) +{ + unsigned long id; + void *priv; + + if (!family->sock_priv_size) + return; + + xa_for_each(family->sock_privs, id, priv) + genl_sk_priv_free(family, priv); + + xa_destroy(family->sock_privs); + kfree(family->sock_privs); +} + +static void genl_sk_priv_free_by_sock(struct genl_family *family, + struct sock *sk) +{ + void *priv; + + if (!family->sock_priv_size) + return; + priv = xa_erase(family->sock_privs, (unsigned long) sk); + if (!priv) + return; + genl_sk_priv_free(family, priv); +} + +static void genl_release(struct sock *sk, unsigned long *groups) +{ + struct genl_family *family; + unsigned int id; + + down_read(&cb_lock); + + idr_for_each_entry(&genl_fam_idr, family, id) + genl_sk_priv_free_by_sock(family, sk); + + up_read(&cb_lock); +} + +/** + * __genl_sk_priv_get - Get family private pointer for socket, if exists + * + * @family: family + * @sk: socket + * + * Lookup a private memory for a Generic netlink family and specified socket. + * + * Caller should make sure this is called in RCU read locked section. + * + * Return: valid pointer on success, otherwise negative error value + * encoded by ERR_PTR(), NULL in case priv does not exist. + */ +void *__genl_sk_priv_get(struct genl_family *family, struct sock *sk) +{ + if (WARN_ON_ONCE(!family->sock_privs)) + return ERR_PTR(-EINVAL); + return xa_load(family->sock_privs, (unsigned long) sk); +} + +/** + * genl_sk_priv_get - Get family private pointer for socket + * + * @family: family + * @sk: socket + * + * Lookup a private memory for a Generic netlink family and specified socket. + * Allocate the private memory in case it was not already done. + * + * Return: valid pointer on success, otherwise negative error value + * encoded by ERR_PTR(). + */ +void *genl_sk_priv_get(struct genl_family *family, struct sock *sk) +{ + void *priv, *old_priv; + + priv = __genl_sk_priv_get(family, sk); + if (priv) + return priv; + + /* priv for the family does not exist so far, create it. */ + + priv = genl_sk_priv_alloc(family); + if (IS_ERR(priv)) + return ERR_CAST(priv); + + old_priv = xa_cmpxchg(family->sock_privs, (unsigned long) sk, NULL, + priv, GFP_KERNEL); + if (old_priv) { + genl_sk_priv_free(family, priv); + if (xa_is_err(old_priv)) + return ERR_PTR(xa_err(old_priv)); + /* Race happened, priv for the socket was already inserted. */ + return old_priv; + } + return priv; +} + /** * genl_register_family - register a generic netlink family * @family: generic netlink family @@ -659,6 +791,10 @@ int genl_register_family(struct genl_family *family) goto errout_locked; } + err = genl_sk_privs_alloc(family); + if (err) + goto errout_locked; + /* * Sadly, a few cases need to be special-cased * due to them having previously abused the API @@ -679,7 +815,7 @@ int genl_register_family(struct genl_family *family) start, end + 1, GFP_KERNEL); if (family->id < 0) { err = family->id; - goto errout_locked; + goto errout_sk_privs_free; } err = genl_validate_assign_mc_groups(family); @@ -698,6 +834,8 @@ int genl_register_family(struct genl_family *family) errout_remove: idr_remove(&genl_fam_idr, family->id); +errout_sk_privs_free: + genl_sk_privs_free(family); errout_locked: genl_unlock_all(); return err; @@ -728,6 +866,9 @@ int genl_unregister_family(const struct genl_family *family) up_write(&cb_lock); wait_event(genl_sk_destructing_waitq, atomic_read(&genl_sk_destructing_cnt) == 0); + + genl_sk_privs_free(family); + genl_unlock(); genl_ctrl_event(CTRL_CMD_DELFAMILY, family, NULL, 0); @@ -1708,6 +1849,7 @@ static int __net_init genl_pernet_init(struct net *net) .input = genl_rcv, .flags = NL_CFG_F_NONROOT_RECV, .bind = genl_bind, + .release = genl_release, }; /* we'll bump the group number right afterwards */ diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 7adf48549a3b..5f1757a32842 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2121,13 +2121,13 @@ static int packet_rcv_vnet(struct msghdr *msg, const struct sk_buff *skb, static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { + enum skb_drop_reason drop_reason = SKB_CONSUMED; struct sock *sk; struct sockaddr_ll *sll; struct packet_sock *po; u8 *skb_head = skb->data; int skb_len = skb->len; unsigned int snaplen, res; - bool is_drop_n_account = false; if (skb->pkt_type == PACKET_LOOPBACK) goto drop; @@ -2217,9 +2217,9 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, return 0; drop_n_acct: - is_drop_n_account = true; atomic_inc(&po->tp_drops); atomic_inc(&sk->sk_drops); + drop_reason = SKB_DROP_REASON_PACKET_SOCK_ERROR; drop_n_restore: if (skb_head != skb->data && skb_shared(skb)) { @@ -2227,16 +2227,14 @@ drop_n_restore: skb->len = skb_len; } drop: - if (!is_drop_n_account) - consume_skb(skb); - else - kfree_skb(skb); + kfree_skb_reason(skb, drop_reason); return 0; } static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { + enum skb_drop_reason drop_reason = SKB_CONSUMED; struct sock *sk; struct packet_sock *po; struct sockaddr_ll *sll; @@ -2250,7 +2248,6 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct sk_buff *copy_skb = NULL; struct timespec64 ts; __u32 ts_status; - bool is_drop_n_account = false; unsigned int slot_id = 0; int vnet_hdr_sz = 0; @@ -2498,19 +2495,16 @@ drop_n_restore: skb->len = skb_len; } drop: - if (!is_drop_n_account) - consume_skb(skb); - else - kfree_skb(skb); + kfree_skb_reason(skb, drop_reason); return 0; drop_n_account: spin_unlock(&sk->sk_receive_queue.lock); atomic_inc(&po->tp_drops); - is_drop_n_account = true; + drop_reason = SKB_DROP_REASON_PACKET_SOCK_ERROR; sk->sk_data_ready(sk); - kfree_skb(copy_skb); + kfree_skb_reason(copy_skb, drop_reason); goto drop_n_restore; } diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 53b3535a1e4a..05008ce5c421 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -165,7 +165,7 @@ int rds_tcp_accept_one(struct socket *sock) struct ipv6_pinfo *inet6; inet6 = inet6_sk(new_sock->sk); - dev_if = inet6->mcast_oif; + dev_if = READ_ONCE(inet6->mcast_oif); } else { dev_if = new_sock->sk->sk_bound_dev_if; } diff --git a/net/rfkill/core.c b/net/rfkill/core.c index 14cc8fe8584b..c3feb4f49d09 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -1351,11 +1351,11 @@ static long rfkill_fop_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct rfkill_data *data = file->private_data; - int ret = -ENOSYS; + int ret = -ENOTTY; u32 size; if (_IOC_TYPE(cmd) != RFKILL_IOC_MAGIC) - return -ENOSYS; + return -ENOTTY; mutex_lock(&data->mtx); switch (_IOC_NR(cmd)) { diff --git a/net/sched/act_api.c b/net/sched/act_api.c index c39252d61ebb..a44c097a880d 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -816,6 +816,9 @@ EXPORT_SYMBOL(tcf_idr_cleanup); * its reference and bind counters, and return 1. Otherwise insert temporary * error pointer (to prevent concurrent users from inserting actions with same * index) and return 0. + * + * May return -EAGAIN for binding actions in case of a parallel add/delete on + * the requested index. */ int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index, @@ -824,43 +827,61 @@ int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index, struct tcf_idrinfo *idrinfo = tn->idrinfo; struct tc_action *p; int ret; + u32 max; -again: - mutex_lock(&idrinfo->lock); if (*index) { +again: + rcu_read_lock(); p = idr_find(&idrinfo->action_idr, *index); + if (IS_ERR(p)) { /* This means that another process allocated * index but did not assign the pointer yet. */ - mutex_unlock(&idrinfo->lock); + rcu_read_unlock(); goto again; } - if (p) { - refcount_inc(&p->tcfa_refcnt); - if (bind) - atomic_inc(&p->tcfa_bindcnt); - *a = p; - ret = 1; - } else { - *a = NULL; - ret = idr_alloc_u32(&idrinfo->action_idr, NULL, index, - *index, GFP_KERNEL); - if (!ret) - idr_replace(&idrinfo->action_idr, - ERR_PTR(-EBUSY), *index); + if (!p) { + /* Empty slot, try to allocate it */ + max = *index; + rcu_read_unlock(); + goto new; + } + + if (!refcount_inc_not_zero(&p->tcfa_refcnt)) { + /* Action was deleted in parallel */ + rcu_read_unlock(); + return -EAGAIN; } + + if (bind) + atomic_inc(&p->tcfa_bindcnt); + *a = p; + + rcu_read_unlock(); + + return 1; } else { + /* Find a slot */ *index = 1; - *a = NULL; - ret = idr_alloc_u32(&idrinfo->action_idr, NULL, index, - UINT_MAX, GFP_KERNEL); - if (!ret) - idr_replace(&idrinfo->action_idr, ERR_PTR(-EBUSY), - *index); + max = UINT_MAX; } + +new: + *a = NULL; + + mutex_lock(&idrinfo->lock); + ret = idr_alloc_u32(&idrinfo->action_idr, ERR_PTR(-EBUSY), index, max, + GFP_KERNEL); mutex_unlock(&idrinfo->lock); + + /* N binds raced for action allocation, + * retry for all the ones that failed. + */ + if (ret == -ENOSPC && *index == max) + ret = -EAGAIN; + return ret; } EXPORT_SYMBOL(tcf_idr_check_alloc); @@ -1098,7 +1119,8 @@ repeat: } } else if (TC_ACT_EXT_CMP(ret, TC_ACT_GOTO_CHAIN)) { if (unlikely(!rcu_access_pointer(a->goto_chain))) { - tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); + tcf_set_drop_reason(skb, + SKB_DROP_REASON_TC_CHAIN_NOTFOUND); return TC_ACT_SHOT; } tcf_action_goto_chain_exec(a, res); @@ -1118,8 +1140,7 @@ int tcf_action_destroy(struct tc_action *actions[], int bind) struct tc_action *a; int ret = 0, i; - for (i = 0; i < TCA_ACT_MAX_PRIO && actions[i]; i++) { - a = actions[i]; + tcf_act_for_each_action(i, a, actions) { actions[i] = NULL; ops = a->ops; ret = __tcf_idr_release(a, bind, true); @@ -1136,18 +1157,29 @@ static int tcf_action_put(struct tc_action *p) return __tcf_action_put(p, false); } -/* Put all actions in this array, skip those NULL's. */ static void tcf_action_put_many(struct tc_action *actions[]) { + struct tc_action *a; + int i; + + tcf_act_for_each_action(i, a, actions) { + const struct tc_action_ops *ops = a->ops; + if (tcf_action_put(a)) + module_put(ops->owner); + } +} + +static void tca_put_bound_many(struct tc_action *actions[], int init_res[]) +{ + struct tc_action *a; int i; - for (i = 0; i < TCA_ACT_MAX_PRIO; i++) { - struct tc_action *a = actions[i]; - const struct tc_action_ops *ops; + tcf_act_for_each_action(i, a, actions) { + const struct tc_action_ops *ops = a->ops; - if (!a) + if (init_res[i] == ACT_P_CREATED) continue; - ops = a->ops; + if (tcf_action_put(a)) module_put(ops->owner); } @@ -1211,8 +1243,7 @@ int tcf_action_dump(struct sk_buff *skb, struct tc_action *actions[], int err = -EINVAL, i; struct nlattr *nest; - for (i = 0; i < TCA_ACT_MAX_PRIO && actions[i]; i++) { - a = actions[i]; + tcf_act_for_each_action(i, a, actions) { nest = nla_nest_start_noflag(skb, i + 1); if (nest == NULL) goto nla_put_failure; @@ -1274,21 +1305,20 @@ static const struct nla_policy tcf_action_policy[TCA_ACT_MAX + 1] = { [TCA_ACT_HW_STATS] = NLA_POLICY_BITFIELD32(TCA_ACT_HW_STATS_ANY), }; -void tcf_idr_insert_many(struct tc_action *actions[]) +void tcf_idr_insert_many(struct tc_action *actions[], int init_res[]) { + struct tc_action *a; int i; - for (i = 0; i < TCA_ACT_MAX_PRIO; i++) { - struct tc_action *a = actions[i]; + tcf_act_for_each_action(i, a, actions) { struct tcf_idrinfo *idrinfo; - if (!a) + if (init_res[i] == 0) /* Bound */ continue; + idrinfo = a->idrinfo; mutex_lock(&idrinfo->lock); - /* Replace ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc if - * it is just created, otherwise this is just a nop. - */ + /* Replace ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc */ idr_replace(&idrinfo->action_idr, a, a->tcfa_index); mutex_unlock(&idrinfo->lock); } @@ -1488,7 +1518,7 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, /* We have to commit them all together, because if any error happened in * between, we could not handle the failure gracefully. */ - tcf_idr_insert_many(actions); + tcf_idr_insert_many(actions, init_res); *attr_size = tcf_action_full_attrs_size(sz); err = i - 1; @@ -1497,10 +1527,8 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, err: tcf_action_destroy(actions, flags & TCA_ACT_FLAGS_BIND); err_mod: - for (i = 0; i < TCA_ACT_MAX_PRIO; i++) { - if (ops[i]) - module_put(ops[i]->owner); - } + for (i = 0; i < TCA_ACT_MAX_PRIO && ops[i]; i++) + module_put(ops[i]->owner); return err; } @@ -1753,10 +1781,10 @@ err_out: static int tcf_action_delete(struct net *net, struct tc_action *actions[]) { + struct tc_action *a; int i; - for (i = 0; i < TCA_ACT_MAX_PRIO && actions[i]; i++) { - struct tc_action *a = actions[i]; + tcf_act_for_each_action(i, a, actions) { const struct tc_action_ops *ops = a->ops; /* Actions can be deleted concurrently so we must save their * type and id to search again after reference is released. @@ -1768,7 +1796,7 @@ static int tcf_action_delete(struct net *net, struct tc_action *actions[]) if (tcf_action_put(a)) { /* last reference, action was deleted concurrently */ module_put(ops->owner); - } else { + } else { int ret; /* now do the delete */ @@ -1780,31 +1808,45 @@ static int tcf_action_delete(struct net *net, struct tc_action *actions[]) return 0; } -static int -tcf_reoffload_del_notify(struct net *net, struct tc_action *action) +static struct sk_buff *tcf_reoffload_del_notify_msg(struct net *net, + struct tc_action *action) { size_t attr_size = tcf_action_fill_size(action); struct tc_action *actions[TCA_ACT_MAX_PRIO] = { [0] = action, }; - const struct tc_action_ops *ops = action->ops; struct sk_buff *skb; - int ret; - skb = alloc_skb(attr_size <= NLMSG_GOODSIZE ? NLMSG_GOODSIZE : attr_size, - GFP_KERNEL); + skb = alloc_skb(max(attr_size, NLMSG_GOODSIZE), GFP_KERNEL); if (!skb) - return -ENOBUFS; + return ERR_PTR(-ENOBUFS); if (tca_get_fill(skb, actions, 0, 0, 0, RTM_DELACTION, 0, 1, NULL) <= 0) { kfree_skb(skb); - return -EINVAL; + return ERR_PTR(-EINVAL); + } + + return skb; +} + +static int tcf_reoffload_del_notify(struct net *net, struct tc_action *action) +{ + const struct tc_action_ops *ops = action->ops; + struct sk_buff *skb; + int ret; + + if (!rtnl_notify_needed(net, 0, RTNLGRP_TC)) { + skb = NULL; + } else { + skb = tcf_reoffload_del_notify_msg(net, action); + if (IS_ERR(skb)) + return PTR_ERR(skb); } ret = tcf_idr_release_unsafe(action); if (ret == ACT_P_DELETED) { module_put(ops->owner); - ret = rtnetlink_send(skb, net, 0, RTNLGRP_TC, 0); + ret = rtnetlink_maybe_send(skb, net, 0, RTNLGRP_TC, 0); } else { kfree_skb(skb); } @@ -1870,23 +1912,41 @@ int tcf_action_reoffload_cb(flow_indr_block_bind_cb_t *cb, return 0; } -static int -tcf_del_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[], - u32 portid, size_t attr_size, struct netlink_ext_ack *extack) +static struct sk_buff *tcf_del_notify_msg(struct net *net, struct nlmsghdr *n, + struct tc_action *actions[], + u32 portid, size_t attr_size, + struct netlink_ext_ack *extack) { - int ret; struct sk_buff *skb; - skb = alloc_skb(attr_size <= NLMSG_GOODSIZE ? NLMSG_GOODSIZE : attr_size, - GFP_KERNEL); + skb = alloc_skb(max(attr_size, NLMSG_GOODSIZE), GFP_KERNEL); if (!skb) - return -ENOBUFS; + return ERR_PTR(-ENOBUFS); if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, RTM_DELACTION, 0, 2, extack) <= 0) { NL_SET_ERR_MSG(extack, "Failed to fill netlink TC action attributes"); kfree_skb(skb); - return -EINVAL; + return ERR_PTR(-EINVAL); + } + + return skb; +} + +static int tcf_del_notify(struct net *net, struct nlmsghdr *n, + struct tc_action *actions[], u32 portid, + size_t attr_size, struct netlink_ext_ack *extack) +{ + struct sk_buff *skb; + int ret; + + if (!rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC)) { + skb = NULL; + } else { + skb = tcf_del_notify_msg(net, n, actions, portid, attr_size, + extack); + if (IS_ERR(skb)) + return PTR_ERR(skb); } /* now do the delete */ @@ -1897,9 +1957,8 @@ tcf_del_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[], return ret; } - ret = rtnetlink_send(skb, net, portid, RTNLGRP_TC, - n->nlmsg_flags & NLM_F_ECHO); - return ret; + return rtnetlink_maybe_send(skb, net, portid, RTNLGRP_TC, + n->nlmsg_flags & NLM_F_ECHO); } static int @@ -1950,26 +2009,44 @@ err: return ret; } -static int -tcf_add_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[], - u32 portid, size_t attr_size, struct netlink_ext_ack *extack) +static struct sk_buff *tcf_add_notify_msg(struct net *net, struct nlmsghdr *n, + struct tc_action *actions[], + u32 portid, size_t attr_size, + struct netlink_ext_ack *extack) { struct sk_buff *skb; - skb = alloc_skb(attr_size <= NLMSG_GOODSIZE ? NLMSG_GOODSIZE : attr_size, - GFP_KERNEL); + skb = alloc_skb(max(attr_size, NLMSG_GOODSIZE), GFP_KERNEL); if (!skb) - return -ENOBUFS; + return ERR_PTR(-ENOBUFS); if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, n->nlmsg_flags, RTM_NEWACTION, 0, 0, extack) <= 0) { NL_SET_ERR_MSG(extack, "Failed to fill netlink attributes while adding TC action"); kfree_skb(skb); - return -EINVAL; + return ERR_PTR(-EINVAL); } - return rtnetlink_send(skb, net, portid, RTNLGRP_TC, - n->nlmsg_flags & NLM_F_ECHO); + return skb; +} + +static int tcf_add_notify(struct net *net, struct nlmsghdr *n, + struct tc_action *actions[], u32 portid, + size_t attr_size, struct netlink_ext_ack *extack) +{ + struct sk_buff *skb; + + if (!rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC)) { + skb = NULL; + } else { + skb = tcf_add_notify_msg(net, n, actions, portid, attr_size, + extack); + if (IS_ERR(skb)) + return PTR_ERR(skb); + } + + return rtnetlink_maybe_send(skb, net, portid, RTNLGRP_TC, + n->nlmsg_flags & NLM_F_ECHO); } static int tcf_action_add(struct net *net, struct nlattr *nla, @@ -1977,7 +2054,7 @@ static int tcf_action_add(struct net *net, struct nlattr *nla, struct netlink_ext_ack *extack) { size_t attr_size = 0; - int loop, ret, i; + int loop, ret; struct tc_action *actions[TCA_ACT_MAX_PRIO] = {}; int init_res[TCA_ACT_MAX_PRIO] = {}; @@ -1990,13 +2067,11 @@ static int tcf_action_add(struct net *net, struct nlattr *nla, if (ret < 0) return ret; + ret = tcf_add_notify(net, n, actions, portid, attr_size, extack); - /* only put existing actions */ - for (i = 0; i < TCA_ACT_MAX_PRIO; i++) - if (init_res[i] == ACT_P_CREATED) - actions[i] = NULL; - tcf_action_put_many(actions); + /* only put bound actions */ + tca_put_bound_many(actions, init_res); return ret; } diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 1976bd163986..8978cf5531d0 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -650,7 +650,7 @@ static void tc_chain_tmplt_del(const struct tcf_proto_ops *tmplt_ops, static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops, void *tmplt_priv, u32 chain_index, struct tcf_block *block, struct sk_buff *oskb, - u32 seq, u16 flags, bool unicast); + u32 seq, u16 flags); static void __tcf_chain_put(struct tcf_chain *chain, bool by_act, bool explicitly_created) @@ -685,8 +685,7 @@ static void __tcf_chain_put(struct tcf_chain *chain, bool by_act, if (non_act_refcnt == chain->explicitly_created && !by_act) { if (non_act_refcnt == 0) tc_chain_notify_delete(tmplt_ops, tmplt_priv, - chain->index, block, NULL, 0, 0, - false); + chain->index, block, NULL, 0, 0); /* Last reference to chain, no need to lock. */ chain->flushing = false; } @@ -1658,7 +1657,6 @@ static inline int __tcf_classify(struct sk_buff *skb, int act_index, u32 *last_executed_chain) { - u32 orig_reason = res->drop_reason; #ifdef CONFIG_NET_CLS_ACT const int max_reclassify_loop = 16; const struct tcf_proto *first_tp; @@ -1683,13 +1681,15 @@ reclassify: */ if (unlikely(n->tp != tp || n->tp->chain != n->chain || !tp->ops->get_exts)) { - tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); + tcf_set_drop_reason(skb, + SKB_DROP_REASON_TC_COOKIE_ERROR); return TC_ACT_SHOT; } exts = tp->ops->get_exts(tp, n->handle); if (unlikely(!exts || n->exts != exts)) { - tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); + tcf_set_drop_reason(skb, + SKB_DROP_REASON_TC_COOKIE_ERROR); return TC_ACT_SHOT; } @@ -1713,18 +1713,13 @@ reclassify: goto reset; } #endif - if (err >= 0) { - /* Policy drop or drop reason is over-written by - * classifiers with a bogus value(0) */ - if (err == TC_ACT_SHOT && - res->drop_reason == SKB_NOT_DROPPED_YET) - tcf_set_drop_reason(res, orig_reason); + if (err >= 0) return err; - } } if (unlikely(n)) { - tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); + tcf_set_drop_reason(skb, + SKB_DROP_REASON_TC_COOKIE_ERROR); return TC_ACT_SHOT; } @@ -1736,7 +1731,8 @@ reset: tp->chain->block->index, tp->prio & 0xffff, ntohs(tp->protocol)); - tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); + tcf_set_drop_reason(skb, + SKB_DROP_REASON_TC_RECLASSIFY_LOOP); return TC_ACT_SHOT; } @@ -1774,7 +1770,8 @@ int tcf_classify(struct sk_buff *skb, n = tcf_exts_miss_cookie_lookup(ext->act_miss_cookie, &act_index); if (!n) { - tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); + tcf_set_drop_reason(skb, + SKB_DROP_REASON_TC_COOKIE_ERROR); return TC_ACT_SHOT; } @@ -1785,7 +1782,9 @@ int tcf_classify(struct sk_buff *skb, fchain = tcf_chain_lookup_rcu(block, chain); if (!fchain) { - tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); + tcf_set_drop_reason(skb, + SKB_DROP_REASON_TC_CHAIN_NOTFOUND); + return TC_ACT_SHOT; } @@ -1807,10 +1806,9 @@ int tcf_classify(struct sk_buff *skb, ext = tc_skb_ext_alloc(skb); if (WARN_ON_ONCE(!ext)) { - tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); + tcf_set_drop_reason(skb, SKB_DROP_REASON_NOMEM); return TC_ACT_SHOT; } - ext->chain = last_executed_chain; ext->mru = cb->mru; ext->post_ct = cb->post_ct; @@ -2053,6 +2051,9 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb, u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; int err = 0; + if (!unicast && !rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC)) + return 0; + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return -ENOBUFS; @@ -2075,13 +2076,16 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb, static int tfilter_del_notify(struct net *net, struct sk_buff *oskb, struct nlmsghdr *n, struct tcf_proto *tp, struct tcf_block *block, struct Qdisc *q, - u32 parent, void *fh, bool unicast, bool *last, - bool rtnl_held, struct netlink_ext_ack *extack) + u32 parent, void *fh, bool *last, bool rtnl_held, + struct netlink_ext_ack *extack) { struct sk_buff *skb; u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; int err; + if (!rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC)) + return tp->ops->delete(tp, fh, last, rtnl_held, extack); + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return -ENOBUFS; @@ -2100,11 +2104,8 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb, return err; } - if (unicast) - err = rtnl_unicast(skb, net, portid); - else - err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, - n->nlmsg_flags & NLM_F_ECHO); + err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, + n->nlmsg_flags & NLM_F_ECHO); if (err < 0) NL_SET_ERR_MSG(extack, "Failed to send filter delete notification"); @@ -2499,9 +2500,8 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n, } else { bool last; - err = tfilter_del_notify(net, skb, n, tp, block, - q, parent, fh, false, &last, - rtnl_held, extack); + err = tfilter_del_notify(net, skb, n, tp, block, q, parent, fh, + &last, rtnl_held, extack); if (err) goto errout; @@ -2906,6 +2906,9 @@ static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb, struct sk_buff *skb; int err = 0; + if (!unicast && !rtnl_notify_needed(net, flags, RTNLGRP_TC)) + return 0; + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return -ENOBUFS; @@ -2929,12 +2932,15 @@ static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb, static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops, void *tmplt_priv, u32 chain_index, struct tcf_block *block, struct sk_buff *oskb, - u32 seq, u16 flags, bool unicast) + u32 seq, u16 flags) { u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; struct net *net = block->net; struct sk_buff *skb; + if (!rtnl_notify_needed(net, flags, RTNLGRP_TC)) + return 0; + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return -ENOBUFS; @@ -2945,9 +2951,6 @@ static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops, return -EINVAL; } - if (unicast) - return rtnl_unicast(skb, net, portid); - return rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO); } @@ -3309,7 +3312,7 @@ int tcf_exts_validate_ex(struct net *net, struct tcf_proto *tp, struct nlattr ** act->type = exts->type = TCA_OLD_COMPAT; exts->actions[0] = act; exts->nr_actions = 1; - tcf_idr_insert_many(exts->actions); + tcf_idr_insert_many(exts->actions, init_res); } else if (exts->action && tb[exts->action]) { int err; diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index d5bdfd4a7655..289e1755c26b 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -71,7 +71,7 @@ struct tc_u_hnode { struct tc_u_hnode __rcu *next; u32 handle; u32 prio; - int refcnt; + refcount_t refcnt; unsigned int divisor; struct idr handle_idr; bool is_root; @@ -86,7 +86,7 @@ struct tc_u_hnode { struct tc_u_common { struct tc_u_hnode __rcu *hlist; void *ptr; - int refcnt; + refcount_t refcnt; struct idr handle_idr; struct hlist_node hnode; long knodes; @@ -359,7 +359,7 @@ static int u32_init(struct tcf_proto *tp) if (root_ht == NULL) return -ENOBUFS; - root_ht->refcnt++; + refcount_set(&root_ht->refcnt, 1); root_ht->handle = tp_c ? gen_new_htid(tp_c, root_ht) : 0x80000000; root_ht->prio = tp->prio; root_ht->is_root = true; @@ -371,18 +371,20 @@ static int u32_init(struct tcf_proto *tp) kfree(root_ht); return -ENOBUFS; } + refcount_set(&tp_c->refcnt, 1); tp_c->ptr = key; INIT_HLIST_NODE(&tp_c->hnode); idr_init(&tp_c->handle_idr); hlist_add_head(&tp_c->hnode, tc_u_hash(key)); + } else { + refcount_inc(&tp_c->refcnt); } - tp_c->refcnt++; RCU_INIT_POINTER(root_ht->next, tp_c->hlist); rcu_assign_pointer(tp_c->hlist, root_ht); - root_ht->refcnt++; + /* root_ht must be destroyed when tcf_proto is destroyed */ rcu_assign_pointer(tp->root, root_ht); tp->data = tp_c; return 0; @@ -393,7 +395,7 @@ static void __u32_destroy_key(struct tc_u_knode *n) struct tc_u_hnode *ht = rtnl_dereference(n->ht_down); tcf_exts_destroy(&n->exts); - if (ht && --ht->refcnt == 0) + if (ht && refcount_dec_and_test(&ht->refcnt)) kfree(ht); kfree(n); } @@ -601,8 +603,6 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht, struct tc_u_hnode __rcu **hn; struct tc_u_hnode *phn; - WARN_ON(--ht->refcnt); - u32_clear_hnode(tp, ht, extack); hn = &tp_c->hlist; @@ -630,10 +630,10 @@ static void u32_destroy(struct tcf_proto *tp, bool rtnl_held, WARN_ON(root_ht == NULL); - if (root_ht && --root_ht->refcnt == 1) + if (root_ht && refcount_dec_and_test(&root_ht->refcnt)) u32_destroy_hnode(tp, root_ht, extack); - if (--tp_c->refcnt == 0) { + if (refcount_dec_and_test(&tp_c->refcnt)) { struct tc_u_hnode *ht; hlist_del(&tp_c->hnode); @@ -645,7 +645,7 @@ static void u32_destroy(struct tcf_proto *tp, bool rtnl_held, /* u32_destroy_key() will later free ht for us, if it's * still referenced by some knode */ - if (--ht->refcnt == 0) + if (refcount_dec_and_test(&ht->refcnt)) kfree_rcu(ht, rcu); } @@ -674,7 +674,7 @@ static int u32_delete(struct tcf_proto *tp, void *arg, bool *last, return -EINVAL; } - if (ht->refcnt == 1) { + if (refcount_dec_if_one(&ht->refcnt)) { u32_destroy_hnode(tp, ht, extack); } else { NL_SET_ERR_MSG_MOD(extack, "Can not delete in-use filter"); @@ -682,7 +682,7 @@ static int u32_delete(struct tcf_proto *tp, void *arg, bool *last, } out: - *last = tp_c->refcnt == 1 && tp_c->knodes == 0; + *last = refcount_read(&tp_c->refcnt) == 1 && tp_c->knodes == 0; return ret; } @@ -766,14 +766,14 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp, NL_SET_ERR_MSG_MOD(extack, "Not linking to root node"); return -EINVAL; } - ht_down->refcnt++; + refcount_inc(&ht_down->refcnt); } ht_old = rtnl_dereference(n->ht_down); rcu_assign_pointer(n->ht_down, ht_down); if (ht_old) - ht_old->refcnt--; + refcount_dec(&ht_old->refcnt); } if (ifindex >= 0) @@ -852,7 +852,7 @@ static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp, /* bump reference count as long as we hold pointer to structure */ if (ht) - ht->refcnt++; + refcount_inc(&ht->refcnt); return new; } @@ -932,7 +932,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, ht_old = rtnl_dereference(n->ht_down); if (ht_old) - ht_old->refcnt++; + refcount_inc(&ht_old->refcnt); } __u32_destroy_key(new); return err; @@ -980,7 +980,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, return err; } } - ht->refcnt = 1; + refcount_set(&ht->refcnt, 1); ht->divisor = divisor; ht->handle = handle; ht->prio = tp->prio; diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c index 9a0b85190a2c..beece8e82c23 100644 --- a/net/sched/sch_cbs.c +++ b/net/sched/sch_cbs.c @@ -57,6 +57,8 @@ #include <linux/string.h> #include <linux/errno.h> #include <linux/skbuff.h> +#include <linux/units.h> + #include <net/netevent.h> #include <net/netlink.h> #include <net/sch_generic.h> @@ -65,8 +67,6 @@ static LIST_HEAD(cbs_list); static DEFINE_SPINLOCK(cbs_list_lock); -#define BYTES_PER_KBIT (1000LL / 8) - struct cbs_sched_data { bool offload; int queue; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 4195a4bc26ca..8dd0e5925342 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -522,8 +522,9 @@ static void dev_watchdog(struct timer_list *t) if (unlikely(timedout_ms)) { trace_net_dev_xmit_timeout(dev, i); - WARN_ONCE(1, "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out %u ms\n", - dev->name, netdev_drivername(dev), i, timedout_ms); + netdev_crit(dev, "NETDEV WATCHDOG: CPU: %d: transmit queue %u timed out %u ms\n", + raw_smp_processor_id(), + i, timedout_ms); netif_freeze_queues(dev); dev->netdev_ops->ndo_tx_timeout(dev, i); netif_unfreeze_queues(dev); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 7f89e43154c0..5fb02bbb4b34 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2099,6 +2099,9 @@ static int sctp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, pr_debug("%s: sk:%p, msghdr:%p, len:%zd, flags:0x%x, addr_len:%p)\n", __func__, sk, msg, len, flags, addr_len); + if (unlikely(flags & MSG_ERRQUEUE)) + return inet_recv_error(sk, msg, len, addr_len); + lock_sock(sk); if (sctp_style(sk, TCP) && !sctp_sstate(sk, ESTABLISHED) && diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 73eebddbbf41..7fc2f3c6d248 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -2461,7 +2461,7 @@ static void smc_listen_work(struct work_struct *work) if (rc) goto out_decl; - rc = smc_clc_srv_v2x_features_validate(pclc, ini); + rc = smc_clc_srv_v2x_features_validate(new_smc, pclc, ini); if (rc) goto out_decl; diff --git a/net/smc/smc.h b/net/smc/smc.h index e377980b8414..cd51261b7d9e 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -196,7 +196,6 @@ struct smc_connection { * - dec on polled tx cqe */ wait_queue_head_t cdc_pend_tx_wq; /* wakeup on no cdc_pend_tx_wr*/ - atomic_t tx_pushing; /* nr_threads trying tx push */ struct delayed_work tx_work; /* retry of smc_cdc_msg_send */ u32 tx_off; /* base offset in peer rmb */ diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 72f4d81a3f41..95e19aa3e769 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -824,6 +824,7 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini) struct smc_clc_smcd_gid_chid *gidchids; struct smc_clc_msg_proposal_area *pclc; struct smc_clc_ipv6_prefix *ipv6_prfx; + struct net *net = sock_net(&smc->sk); struct smc_clc_v2_extension *v2_ext; struct smc_clc_msg_smcd *pclc_smcd; struct smc_clc_msg_trail *trl; @@ -943,8 +944,8 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini) } if (smcr_indicated(ini->smc_type_v2)) { memcpy(v2_ext->roce, ini->smcrv2.ib_gid_v2, SMC_GID_SIZE); - v2_ext->max_conns = SMC_CONN_PER_LGR_PREFER; - v2_ext->max_links = SMC_LINKS_PER_LGR_MAX_PREFER; + v2_ext->max_conns = net->smc.sysctl_max_conns_per_lgr; + v2_ext->max_links = net->smc.sysctl_max_links_per_lgr; } pclc_base->hdr.length = htons(plen); @@ -1170,10 +1171,12 @@ int smc_clc_send_accept(struct smc_sock *new_smc, bool srv_first_contact, return len > 0 ? 0 : len; } -int smc_clc_srv_v2x_features_validate(struct smc_clc_msg_proposal *pclc, +int smc_clc_srv_v2x_features_validate(struct smc_sock *smc, + struct smc_clc_msg_proposal *pclc, struct smc_init_info *ini) { struct smc_clc_v2_extension *pclc_v2_ext; + struct net *net = sock_net(&smc->sk); ini->max_conns = SMC_CONN_PER_LGR_MAX; ini->max_links = SMC_LINKS_ADD_LNK_MAX; @@ -1187,11 +1190,13 @@ int smc_clc_srv_v2x_features_validate(struct smc_clc_msg_proposal *pclc, return SMC_CLC_DECL_NOV2EXT; if (ini->smcr_version & SMC_V2) { - ini->max_conns = min_t(u8, pclc_v2_ext->max_conns, SMC_CONN_PER_LGR_PREFER); + ini->max_conns = min_t(u8, pclc_v2_ext->max_conns, + net->smc.sysctl_max_conns_per_lgr); if (ini->max_conns < SMC_CONN_PER_LGR_MIN) return SMC_CLC_DECL_MAXCONNERR; - ini->max_links = min_t(u8, pclc_v2_ext->max_links, SMC_LINKS_PER_LGR_MAX_PREFER); + ini->max_links = min_t(u8, pclc_v2_ext->max_links, + net->smc.sysctl_max_links_per_lgr); if (ini->max_links < SMC_LINKS_ADD_LNK_MIN) return SMC_CLC_DECL_MAXLINKERR; } diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h index 08155a96a02a..1697b84d85be 100644 --- a/net/smc/smc_clc.h +++ b/net/smc/smc_clc.h @@ -422,7 +422,8 @@ int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact, u8 version, u8 *eid, struct smc_init_info *ini); int smc_clc_send_accept(struct smc_sock *smc, bool srv_first_contact, u8 version, u8 *negotiated_eid, struct smc_init_info *ini); -int smc_clc_srv_v2x_features_validate(struct smc_clc_msg_proposal *pclc, +int smc_clc_srv_v2x_features_validate(struct smc_sock *smc, + struct smc_clc_msg_proposal *pclc, struct smc_init_info *ini); int smc_clc_clnt_v2x_features_validate(struct smc_clc_first_contact_ext *fce, struct smc_init_info *ini); diff --git a/net/smc/smc_sysctl.c b/net/smc/smc_sysctl.c index 5cbc18c6e62b..a5946d1b9d60 100644 --- a/net/smc/smc_sysctl.c +++ b/net/smc/smc_sysctl.c @@ -25,6 +25,10 @@ static int max_sndbuf = INT_MAX / 2; static int max_rcvbuf = INT_MAX / 2; static const int net_smc_wmem_init = (64 * 1024); static const int net_smc_rmem_init = (64 * 1024); +static int links_per_lgr_min = SMC_LINKS_ADD_LNK_MIN; +static int links_per_lgr_max = SMC_LINKS_ADD_LNK_MAX; +static int conns_per_lgr_min = SMC_CONN_PER_LGR_MIN; +static int conns_per_lgr_max = SMC_CONN_PER_LGR_MAX; static struct ctl_table smc_table[] = { { @@ -68,6 +72,24 @@ static struct ctl_table smc_table[] = { .extra1 = &min_rcvbuf, .extra2 = &max_rcvbuf, }, + { + .procname = "smcr_max_links_per_lgr", + .data = &init_net.smc.sysctl_max_links_per_lgr, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &links_per_lgr_min, + .extra2 = &links_per_lgr_max, + }, + { + .procname = "smcr_max_conns_per_lgr", + .data = &init_net.smc.sysctl_max_conns_per_lgr, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &conns_per_lgr_min, + .extra2 = &conns_per_lgr_max, + }, { } }; @@ -97,6 +119,8 @@ int __net_init smc_sysctl_net_init(struct net *net) net->smc.sysctl_smcr_testlink_time = SMC_LLC_TESTLINK_DEFAULT_TIME; WRITE_ONCE(net->smc.sysctl_wmem, net_smc_wmem_init); WRITE_ONCE(net->smc.sysctl_rmem, net_smc_rmem_init); + net->smc.sysctl_max_links_per_lgr = SMC_LINKS_PER_LGR_MAX_PREFER; + net->smc.sysctl_max_conns_per_lgr = SMC_CONN_PER_LGR_PREFER; return 0; diff --git a/net/smc/smc_sysctl.h b/net/smc/smc_sysctl.h index 0becc11bd2f4..eb2465ae1e15 100644 --- a/net/smc/smc_sysctl.h +++ b/net/smc/smc_sysctl.h @@ -23,6 +23,8 @@ void __net_exit smc_sysctl_net_exit(struct net *net); static inline int smc_sysctl_net_init(struct net *net) { net->smc.sysctl_autocorking_size = SMC_AUTOCORKING_DEFAULT_SIZE; + net->smc.sysctl_max_links_per_lgr = SMC_LINKS_PER_LGR_MAX_PREFER; + net->smc.sysctl_max_conns_per_lgr = SMC_CONN_PER_LGR_PREFER; return 0; } diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index 3b0ff3b589c7..214ac3cbcf9a 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -621,7 +621,7 @@ static int smcd_tx_sndbuf_nonempty(struct smc_connection *conn) return rc; } -static int __smc_tx_sndbuf_nonempty(struct smc_connection *conn) +int smc_tx_sndbuf_nonempty(struct smc_connection *conn) { struct smc_sock *smc = container_of(conn, struct smc_sock, conn); int rc = 0; @@ -655,34 +655,6 @@ out: return rc; } -int smc_tx_sndbuf_nonempty(struct smc_connection *conn) -{ - int rc; - - /* This make sure only one can send simultaneously to prevent wasting - * of CPU and CDC slot. - * Record whether someone has tried to push while we are pushing. - */ - if (atomic_inc_return(&conn->tx_pushing) > 1) - return 0; - -again: - atomic_set(&conn->tx_pushing, 1); - smp_wmb(); /* Make sure tx_pushing is 1 before real send */ - rc = __smc_tx_sndbuf_nonempty(conn); - - /* We need to check whether someone else have added some data into - * the send queue and tried to push but failed after the atomic_set() - * when we are pushing. - * If so, we need to push again to prevent those data hang in the send - * queue. - */ - if (unlikely(!atomic_dec_and_test(&conn->tx_pushing))) - goto again; - - return rc; -} - /* Wakeup sndbuf consumers from process context * since there is more data to transmit. The caller * must hold sock lock. diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index c763008a8adb..079aebb16ed8 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -168,7 +168,7 @@ static struct sk_buff *tipc_get_err_tlv(char *str) int str_len = strlen(str) + 1; struct sk_buff *buf; - buf = tipc_tlv_alloc(TLV_SPACE(str_len)); + buf = tipc_tlv_alloc(str_len); if (buf) tipc_add_tlv(buf, TIPC_TLV_ERROR_STRING, str, str_len); diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 816725af281f..54ba7316f808 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -2264,8 +2264,13 @@ static int vsock_set_rcvlowat(struct sock *sk, int val) transport = vsk->transport; - if (transport && transport->set_rcvlowat) - return transport->set_rcvlowat(vsk, val); + if (transport && transport->notify_set_rcvlowat) { + int err; + + err = transport->notify_set_rcvlowat(vsk, val); + if (err) + return err; + } WRITE_ONCE(sk->sk_rcvlowat, val ? : 1); return 0; diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c index 7cb1a9d2cdb4..e2157e387217 100644 --- a/net/vmw_vsock/hyperv_transport.c +++ b/net/vmw_vsock/hyperv_transport.c @@ -816,7 +816,7 @@ int hvs_notify_send_post_enqueue(struct vsock_sock *vsk, ssize_t written, } static -int hvs_set_rcvlowat(struct vsock_sock *vsk, int val) +int hvs_notify_set_rcvlowat(struct vsock_sock *vsk, int val) { return -EOPNOTSUPP; } @@ -856,7 +856,7 @@ static struct vsock_transport hvs_transport = { .notify_send_pre_enqueue = hvs_notify_send_pre_enqueue, .notify_send_post_enqueue = hvs_notify_send_post_enqueue, - .set_rcvlowat = hvs_set_rcvlowat + .notify_set_rcvlowat = hvs_notify_set_rcvlowat }; static bool hvs_check_transport(struct vsock_sock *vsk) diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index af5bab1acee1..f495b9e5186b 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -537,6 +537,7 @@ static struct virtio_transport virtio_transport = { .notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue, .notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue, .notify_buffer_size = virtio_transport_notify_buffer_size, + .notify_set_rcvlowat = virtio_transport_notify_set_rcvlowat, .read_skb = virtio_transport_read_skb, }, diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 6df246b53260..16ff976a86e3 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -557,6 +557,8 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk, struct virtio_vsock_sock *vvs = vsk->trans; size_t bytes, total = 0; struct sk_buff *skb; + u32 fwd_cnt_delta; + bool low_rx_bytes; int err = -EFAULT; u32 free_space; @@ -600,7 +602,10 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk, } } - free_space = vvs->buf_alloc - (vvs->fwd_cnt - vvs->last_fwd_cnt); + fwd_cnt_delta = vvs->fwd_cnt - vvs->last_fwd_cnt; + free_space = vvs->buf_alloc - fwd_cnt_delta; + low_rx_bytes = (vvs->rx_bytes < + sock_rcvlowat(sk_vsock(vsk), 0, INT_MAX)); spin_unlock_bh(&vvs->rx_lock); @@ -610,9 +615,11 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk, * too high causes extra messages. Too low causes transmitter * stalls. As stalls are in theory more expensive than extra * messages, we set the limit to a high value. TODO: experiment - * with different values. + * with different values. Also send credit update message when + * number of bytes in rx queue is not enough to wake up reader. */ - if (free_space < VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) + if (fwd_cnt_delta && + (free_space < VIRTIO_VSOCK_MAX_PKT_BUF_SIZE || low_rx_bytes)) virtio_transport_send_credit_update(vsk); return total; @@ -1683,6 +1690,36 @@ int virtio_transport_read_skb(struct vsock_sock *vsk, skb_read_actor_t recv_acto } EXPORT_SYMBOL_GPL(virtio_transport_read_skb); +int virtio_transport_notify_set_rcvlowat(struct vsock_sock *vsk, int val) +{ + struct virtio_vsock_sock *vvs = vsk->trans; + bool send_update; + + spin_lock_bh(&vvs->rx_lock); + + /* If number of available bytes is less than new SO_RCVLOWAT value, + * kick sender to send more data, because sender may sleep in its + * 'send()' syscall waiting for enough space at our side. Also + * don't send credit update when peer already knows actual value - + * such transmission will be useless. + */ + send_update = (vvs->rx_bytes < val) && + (vvs->fwd_cnt != vvs->last_fwd_cnt); + + spin_unlock_bh(&vvs->rx_lock); + + if (send_update) { + int err; + + err = virtio_transport_send_credit_update(vsk); + if (err < 0) + return err; + } + + return 0; +} +EXPORT_SYMBOL_GPL(virtio_transport_notify_set_rcvlowat); + MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Asias He"); MODULE_DESCRIPTION("common code for virtio vsock"); diff --git a/net/vmw_vsock/vsock_loopback.c b/net/vmw_vsock/vsock_loopback.c index 048640167411..6dea6119f5b2 100644 --- a/net/vmw_vsock/vsock_loopback.c +++ b/net/vmw_vsock/vsock_loopback.c @@ -96,6 +96,7 @@ static struct virtio_transport loopback_transport = { .notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue, .notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue, .notify_buffer_size = virtio_transport_notify_buffer_size, + .notify_set_rcvlowat = virtio_transport_notify_set_rcvlowat, .read_skb = virtio_transport_read_skb, }, diff --git a/net/wireless/Makefile b/net/wireless/Makefile index 089c841528c8..72074fd36df4 100644 --- a/net/wireless/Makefile +++ b/net/wireless/Makefile @@ -25,7 +25,7 @@ ifneq ($(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR),) cfg80211-y += extra-certs.o endif -$(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.hex) +$(obj)/shipped-certs.c: $(sort $(wildcard $(srctree)/$(src)/certs/*.hex)) @$(kecho) " GEN $@" $(Q)(echo '#include "reg.h"'; \ echo 'const u8 shipped_regdb_certs[] = {'; \ @@ -35,7 +35,7 @@ $(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.hex) ) > $@ $(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR) \ - $(wildcard $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR)/*.x509) + $(sort $(wildcard $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR)/*.x509)) @$(kecho) " GEN $@" $(Q)(set -e; \ allf=""; \ diff --git a/net/wireless/chan.c b/net/wireless/chan.c index 2d21e423abdb..dfb4893421d7 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -141,7 +141,7 @@ static bool cfg80211_edmg_chandef_valid(const struct cfg80211_chan_def *chandef) return true; } -static int nl80211_chan_width_to_mhz(enum nl80211_chan_width chan_width) +int nl80211_chan_width_to_mhz(enum nl80211_chan_width chan_width) { int mhz; @@ -190,6 +190,7 @@ static int nl80211_chan_width_to_mhz(enum nl80211_chan_width chan_width) } return mhz; } +EXPORT_SYMBOL(nl80211_chan_width_to_mhz); static int cfg80211_chandef_get_width(const struct cfg80211_chan_def *c) { diff --git a/net/wireless/core.h b/net/wireless/core.h index cb61d33d4f1e..1963958263d2 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -458,6 +458,9 @@ int cfg80211_scan(struct cfg80211_registered_device *rdev); extern struct work_struct cfg80211_disconnect_work; +#define NL80211_BSS_USE_FOR_ALL (NL80211_BSS_USE_FOR_NORMAL | \ + NL80211_BSS_USE_FOR_MLD_LINK) + void cfg80211_set_dfs_state(struct wiphy *wiphy, const struct cfg80211_chan_def *chandef, enum nl80211_dfs_state dfs_state); diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index bad9e4fd842f..f635a8b6ca2e 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -22,7 +22,7 @@ void cfg80211_rx_assoc_resp(struct net_device *dev, - struct cfg80211_rx_assoc_resp_data *data) + const struct cfg80211_rx_assoc_resp_data *data) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 1cbbb11ea503..8b45fb420f4c 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -818,6 +818,9 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_HW_TIMESTAMP_ENABLED] = { .type = NLA_FLAG }, [NL80211_ATTR_EMA_RNR_ELEMS] = { .type = NLA_NESTED }, [NL80211_ATTR_MLO_LINK_DISABLED] = { .type = NLA_FLAG }, + [NL80211_ATTR_BSS_DUMP_INCLUDE_USE_DATA] = { .type = NLA_FLAG }, + [NL80211_ATTR_MLO_TTLM_DLINK] = NLA_POLICY_EXACT_LEN(sizeof(u16) * 8), + [NL80211_ATTR_MLO_TTLM_ULINK] = NLA_POLICY_EXACT_LEN(sizeof(u16) * 8), }; /* policy for the key attributes */ @@ -4855,7 +4858,7 @@ static struct cfg80211_acl_data *parse_acl_data(struct wiphy *wiphy, return ERR_PTR(n_entries); if (n_entries > wiphy->max_acl_mac_addrs) - return ERR_PTR(-ENOTSUPP); + return ERR_PTR(-EOPNOTSUPP); acl = kzalloc(struct_size(acl, mac_addrs, n_entries), GFP_KERNEL); if (!acl) @@ -9342,6 +9345,7 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) else eth_broadcast_addr(request->bssid); + request->tsf_report_link_id = nl80211_link_id_or_invalid(info->attrs); request->wdev = wdev; request->wiphy = &rdev->wiphy; request->scan_start = jiffies; @@ -10409,6 +10413,15 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb, break; } + if (nla_put_u32(msg, NL80211_BSS_USE_FOR, res->use_for)) + goto nla_put_failure; + + if (res->cannot_use_reasons && + nla_put_u64_64bit(msg, NL80211_BSS_CANNOT_USE_REASONS, + res->cannot_use_reasons, + NL80211_BSS_PAD)) + goto nla_put_failure; + nla_nest_end(msg, bss); genlmsg_end(msg, hdr); @@ -10426,15 +10439,27 @@ static int nl80211_dump_scan(struct sk_buff *skb, struct netlink_callback *cb) struct cfg80211_registered_device *rdev; struct cfg80211_internal_bss *scan; struct wireless_dev *wdev; + struct nlattr **attrbuf; int start = cb->args[2], idx = 0; + bool dump_include_use_data; int err; - err = nl80211_prepare_wdev_dump(cb, &rdev, &wdev, NULL); - if (err) + attrbuf = kcalloc(NUM_NL80211_ATTR, sizeof(*attrbuf), GFP_KERNEL); + if (!attrbuf) + return -ENOMEM; + + err = nl80211_prepare_wdev_dump(cb, &rdev, &wdev, attrbuf); + if (err) { + kfree(attrbuf); return err; + } /* nl80211_prepare_wdev_dump acquired it in the successful case */ __acquire(&rdev->wiphy.mtx); + dump_include_use_data = + attrbuf[NL80211_ATTR_BSS_DUMP_INCLUDE_USE_DATA]; + kfree(attrbuf); + spin_lock_bh(&rdev->bss_lock); /* @@ -10451,6 +10476,9 @@ static int nl80211_dump_scan(struct sk_buff *skb, struct netlink_callback *cb) list_for_each_entry(scan, &rdev->bss_list, list) { if (++idx <= start) continue; + if (!dump_include_use_data && + !(scan->pub.use_for & NL80211_BSS_USE_FOR_NORMAL)) + continue; if (nl80211_send_bss(skb, cb, cb->nlh->nlmsg_seq, NLM_F_MULTI, rdev, wdev, scan) < 0) { @@ -10902,12 +10930,13 @@ static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev, static struct cfg80211_bss *nl80211_assoc_bss(struct cfg80211_registered_device *rdev, const u8 *ssid, int ssid_len, - struct nlattr **attrs) + struct nlattr **attrs, + int assoc_link_id, int link_id) { struct ieee80211_channel *chan; struct cfg80211_bss *bss; const u8 *bssid; - u32 freq; + u32 freq, use_for = 0; if (!attrs[NL80211_ATTR_MAC] || !attrs[NL80211_ATTR_WIPHY_FREQ]) return ERR_PTR(-EINVAL); @@ -10922,10 +10951,16 @@ static struct cfg80211_bss *nl80211_assoc_bss(struct cfg80211_registered_device if (!chan) return ERR_PTR(-EINVAL); - bss = cfg80211_get_bss(&rdev->wiphy, chan, bssid, - ssid, ssid_len, - IEEE80211_BSS_TYPE_ESS, - IEEE80211_PRIVACY_ANY); + if (assoc_link_id >= 0) + use_for = NL80211_BSS_USE_FOR_MLD_LINK; + if (assoc_link_id == link_id) + use_for |= NL80211_BSS_USE_FOR_NORMAL; + + bss = __cfg80211_get_bss(&rdev->wiphy, chan, bssid, + ssid, ssid_len, + IEEE80211_BSS_TYPE_ESS, + IEEE80211_PRIVACY_ANY, + use_for); if (!bss) return ERR_PTR(-ENOENT); @@ -11104,7 +11139,8 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) goto free; } req.links[link_id].bss = - nl80211_assoc_bss(rdev, ssid, ssid_len, attrs); + nl80211_assoc_bss(rdev, ssid, ssid_len, attrs, + req.link_id, link_id); if (IS_ERR(req.links[link_id].bss)) { err = PTR_ERR(req.links[link_id].bss); req.links[link_id].bss = NULL; @@ -11169,7 +11205,8 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) if (req.link_id >= 0) return -EINVAL; - req.bss = nl80211_assoc_bss(rdev, ssid, ssid_len, info->attrs); + req.bss = nl80211_assoc_bss(rdev, ssid, ssid_len, info->attrs, + -1, -1); if (IS_ERR(req.bss)) return PTR_ERR(req.bss); ap_addr = req.bss->bssid; @@ -12178,16 +12215,18 @@ static int nl80211_wiphy_netns(struct sk_buff *skb, struct genl_info *info) return err; } -static int nl80211_setdel_pmksa(struct sk_buff *skb, struct genl_info *info) +static int nl80211_set_pmksa(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; - int (*rdev_ops)(struct wiphy *wiphy, struct net_device *dev, - struct cfg80211_pmksa *pmksa) = NULL; struct net_device *dev = info->user_ptr[1]; struct cfg80211_pmksa pmksa; + bool ap_pmksa_caching_support = false; memset(&pmksa, 0, sizeof(struct cfg80211_pmksa)); + ap_pmksa_caching_support = wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_AP_PMKSA_CACHING); + if (!info->attrs[NL80211_ATTR_PMKID]) return -EINVAL; @@ -12196,16 +12235,15 @@ static int nl80211_setdel_pmksa(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_MAC]) { pmksa.bssid = nla_data(info->attrs[NL80211_ATTR_MAC]); } else if (info->attrs[NL80211_ATTR_SSID] && - info->attrs[NL80211_ATTR_FILS_CACHE_ID] && - (info->genlhdr->cmd == NL80211_CMD_DEL_PMKSA || - info->attrs[NL80211_ATTR_PMK])) { + info->attrs[NL80211_ATTR_FILS_CACHE_ID] && + info->attrs[NL80211_ATTR_PMK]) { pmksa.ssid = nla_data(info->attrs[NL80211_ATTR_SSID]); pmksa.ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]); - pmksa.cache_id = - nla_data(info->attrs[NL80211_ATTR_FILS_CACHE_ID]); + pmksa.cache_id = nla_data(info->attrs[NL80211_ATTR_FILS_CACHE_ID]); } else { return -EINVAL; } + if (info->attrs[NL80211_ATTR_PMK]) { pmksa.pmk = nla_data(info->attrs[NL80211_ATTR_PMK]); pmksa.pmk_len = nla_len(info->attrs[NL80211_ATTR_PMK]); @@ -12217,32 +12255,71 @@ static int nl80211_setdel_pmksa(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_PMK_REAUTH_THRESHOLD]) pmksa.pmk_reauth_threshold = - nla_get_u8( - info->attrs[NL80211_ATTR_PMK_REAUTH_THRESHOLD]); + nla_get_u8(info->attrs[NL80211_ATTR_PMK_REAUTH_THRESHOLD]); if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION && dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT && - !(dev->ieee80211_ptr->iftype == NL80211_IFTYPE_AP && - wiphy_ext_feature_isset(&rdev->wiphy, - NL80211_EXT_FEATURE_AP_PMKSA_CACHING))) + !((dev->ieee80211_ptr->iftype == NL80211_IFTYPE_AP || + dev->ieee80211_ptr->iftype == NL80211_IFTYPE_P2P_GO) && + ap_pmksa_caching_support)) return -EOPNOTSUPP; - switch (info->genlhdr->cmd) { - case NL80211_CMD_SET_PMKSA: - rdev_ops = rdev->ops->set_pmksa; - break; - case NL80211_CMD_DEL_PMKSA: - rdev_ops = rdev->ops->del_pmksa; - break; - default: - WARN_ON(1); - break; + if (!rdev->ops->set_pmksa) + return -EOPNOTSUPP; + + return rdev_set_pmksa(rdev, dev, &pmksa); +} + +static int nl80211_del_pmksa(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + struct cfg80211_pmksa pmksa; + bool sae_offload_support = false; + bool owe_offload_support = false; + bool ap_pmksa_caching_support = false; + + memset(&pmksa, 0, sizeof(struct cfg80211_pmksa)); + + sae_offload_support = wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_SAE_OFFLOAD); + owe_offload_support = wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_OWE_OFFLOAD); + ap_pmksa_caching_support = wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_AP_PMKSA_CACHING); + + if (info->attrs[NL80211_ATTR_PMKID]) + pmksa.pmkid = nla_data(info->attrs[NL80211_ATTR_PMKID]); + + if (info->attrs[NL80211_ATTR_MAC]) { + pmksa.bssid = nla_data(info->attrs[NL80211_ATTR_MAC]); + } else if (info->attrs[NL80211_ATTR_SSID]) { + /* SSID based pmksa flush suppported only for FILS, + * OWE/SAE OFFLOAD cases + */ + if (info->attrs[NL80211_ATTR_FILS_CACHE_ID] && + info->attrs[NL80211_ATTR_PMK]) { + pmksa.cache_id = nla_data(info->attrs[NL80211_ATTR_FILS_CACHE_ID]); + } else if (!sae_offload_support && !owe_offload_support) { + return -EINVAL; + } + pmksa.ssid = nla_data(info->attrs[NL80211_ATTR_SSID]); + pmksa.ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]); + } else { + return -EINVAL; } - if (!rdev_ops) + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION && + dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT && + !((dev->ieee80211_ptr->iftype == NL80211_IFTYPE_AP || + dev->ieee80211_ptr->iftype == NL80211_IFTYPE_P2P_GO) && + ap_pmksa_caching_support)) + return -EOPNOTSUPP; + + if (!rdev->ops->del_pmksa) return -EOPNOTSUPP; - return rdev_ops(&rdev->wiphy, dev, &pmksa); + return rdev_del_pmksa(rdev, dev, &pmksa); } static int nl80211_flush_pmksa(struct sk_buff *skb, struct genl_info *info) @@ -15846,7 +15923,7 @@ static int parse_tid_conf(struct cfg80211_registered_device *rdev, if (tid_conf->mask & ~mask) { NL_SET_ERR_MSG(extack, "unsupported TID configuration"); - return -ENOTSUPP; + return -EOPNOTSUPP; } return 0; @@ -16239,6 +16316,35 @@ static int nl80211_set_hw_timestamp(struct sk_buff *skb, return rdev_set_hw_timestamp(rdev, dev, &hwts); } +static int +nl80211_set_ttlm(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_ttlm_params params = {}; + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = dev->ieee80211_ptr; + + if (wdev->iftype != NL80211_IFTYPE_STATION && + wdev->iftype != NL80211_IFTYPE_P2P_CLIENT) + return -EOPNOTSUPP; + + if (!wdev->connected) + return -ENOLINK; + + if (!info->attrs[NL80211_ATTR_MLO_TTLM_DLINK] || + !info->attrs[NL80211_ATTR_MLO_TTLM_ULINK]) + return -EINVAL; + + nla_memcpy(params.dlink, + info->attrs[NL80211_ATTR_MLO_TTLM_DLINK], + sizeof(params.dlink)); + nla_memcpy(params.ulink, + info->attrs[NL80211_ATTR_MLO_TTLM_ULINK], + sizeof(params.ulink)); + + return rdev_set_ttlm(rdev, dev, ¶ms); +} + #define NL80211_FLAG_NEED_WIPHY 0x01 #define NL80211_FLAG_NEED_NETDEV 0x02 #define NL80211_FLAG_NEED_RTNL 0x04 @@ -16927,7 +17033,7 @@ static const struct genl_small_ops nl80211_small_ops[] = { { .cmd = NL80211_CMD_SET_PMKSA, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = nl80211_setdel_pmksa, + .doit = nl80211_set_pmksa, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_CLEAR_SKB), @@ -16935,7 +17041,7 @@ static const struct genl_small_ops nl80211_small_ops[] = { { .cmd = NL80211_CMD_DEL_PMKSA, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = nl80211_setdel_pmksa, + .doit = nl80211_del_pmksa, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, @@ -17420,6 +17526,12 @@ static const struct genl_small_ops nl80211_small_ops[] = { .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, + { + .cmd = NL80211_CMD_SET_TID_TO_LINK_MAPPING, + .doit = nl80211_set_ttlm, + .flags = GENL_UNS_ADMIN_PERM, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), + }, }; static struct genl_family nl80211_fam __ro_after_init = { @@ -17751,21 +17863,29 @@ nla_put_failure: nlmsg_free(msg); } +struct nl80211_mlme_event { + enum nl80211_commands cmd; + const u8 *buf; + size_t buf_len; + int uapsd_queues; + const u8 *req_ies; + size_t req_ies_len; + bool reconnect; +}; + static void nl80211_send_mlme_event(struct cfg80211_registered_device *rdev, struct net_device *netdev, - const u8 *buf, size_t len, - enum nl80211_commands cmd, gfp_t gfp, - int uapsd_queues, const u8 *req_ies, - size_t req_ies_len, bool reconnect) + const struct nl80211_mlme_event *event, + gfp_t gfp) { struct sk_buff *msg; void *hdr; - msg = nlmsg_new(100 + len + req_ies_len, gfp); + msg = nlmsg_new(100 + event->buf_len + event->req_ies_len, gfp); if (!msg) return; - hdr = nl80211hdr_put(msg, 0, 0, 0, cmd); + hdr = nl80211hdr_put(msg, 0, 0, 0, event->cmd); if (!hdr) { nlmsg_free(msg); return; @@ -17773,22 +17893,24 @@ static void nl80211_send_mlme_event(struct cfg80211_registered_device *rdev, if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || - nla_put(msg, NL80211_ATTR_FRAME, len, buf) || - (req_ies && - nla_put(msg, NL80211_ATTR_REQ_IE, req_ies_len, req_ies))) + nla_put(msg, NL80211_ATTR_FRAME, event->buf_len, event->buf) || + (event->req_ies && + nla_put(msg, NL80211_ATTR_REQ_IE, event->req_ies_len, + event->req_ies))) goto nla_put_failure; - if (reconnect && nla_put_flag(msg, NL80211_ATTR_RECONNECT_REQUESTED)) + if (event->reconnect && + nla_put_flag(msg, NL80211_ATTR_RECONNECT_REQUESTED)) goto nla_put_failure; - if (uapsd_queues >= 0) { + if (event->uapsd_queues >= 0) { struct nlattr *nla_wmm = nla_nest_start_noflag(msg, NL80211_ATTR_STA_WME); if (!nla_wmm) goto nla_put_failure; if (nla_put_u8(msg, NL80211_STA_WME_UAPSD_QUEUES, - uapsd_queues)) + event->uapsd_queues)) goto nla_put_failure; nla_nest_end(msg, nla_wmm); @@ -17808,37 +17930,60 @@ void nl80211_send_rx_auth(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *buf, size_t len, gfp_t gfp) { - nl80211_send_mlme_event(rdev, netdev, buf, len, - NL80211_CMD_AUTHENTICATE, gfp, -1, NULL, 0, - false); + struct nl80211_mlme_event event = { + .cmd = NL80211_CMD_AUTHENTICATE, + .buf = buf, + .buf_len = len, + .uapsd_queues = -1, + }; + + nl80211_send_mlme_event(rdev, netdev, &event, gfp); } void nl80211_send_rx_assoc(struct cfg80211_registered_device *rdev, struct net_device *netdev, - struct cfg80211_rx_assoc_resp_data *data) + const struct cfg80211_rx_assoc_resp_data *data) { - nl80211_send_mlme_event(rdev, netdev, data->buf, data->len, - NL80211_CMD_ASSOCIATE, GFP_KERNEL, - data->uapsd_queues, - data->req_ies, data->req_ies_len, false); + struct nl80211_mlme_event event = { + .cmd = NL80211_CMD_ASSOCIATE, + .buf = data->buf, + .buf_len = data->len, + .uapsd_queues = data->uapsd_queues, + .req_ies = data->req_ies, + .req_ies_len = data->req_ies_len, + }; + + nl80211_send_mlme_event(rdev, netdev, &event, GFP_KERNEL); } void nl80211_send_deauth(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *buf, size_t len, bool reconnect, gfp_t gfp) { - nl80211_send_mlme_event(rdev, netdev, buf, len, - NL80211_CMD_DEAUTHENTICATE, gfp, -1, NULL, 0, - reconnect); + struct nl80211_mlme_event event = { + .cmd = NL80211_CMD_DEAUTHENTICATE, + .buf = buf, + .buf_len = len, + .reconnect = reconnect, + .uapsd_queues = -1, + }; + + nl80211_send_mlme_event(rdev, netdev, &event, gfp); } void nl80211_send_disassoc(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *buf, size_t len, bool reconnect, gfp_t gfp) { - nl80211_send_mlme_event(rdev, netdev, buf, len, - NL80211_CMD_DISASSOCIATE, gfp, -1, NULL, 0, - reconnect); + struct nl80211_mlme_event event = { + .cmd = NL80211_CMD_DISASSOCIATE, + .buf = buf, + .buf_len = len, + .reconnect = reconnect, + .uapsd_queues = -1, + }; + + nl80211_send_mlme_event(rdev, netdev, &event, gfp); } void cfg80211_rx_unprot_mlme_mgmt(struct net_device *dev, const u8 *buf, @@ -17848,28 +17993,31 @@ void cfg80211_rx_unprot_mlme_mgmt(struct net_device *dev, const u8 *buf, struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); const struct ieee80211_mgmt *mgmt = (void *)buf; - u32 cmd; + struct nl80211_mlme_event event = { + .buf = buf, + .buf_len = len, + .uapsd_queues = -1, + }; if (WARN_ON(len < 2)) return; if (ieee80211_is_deauth(mgmt->frame_control)) { - cmd = NL80211_CMD_UNPROT_DEAUTHENTICATE; + event.cmd = NL80211_CMD_UNPROT_DEAUTHENTICATE; } else if (ieee80211_is_disassoc(mgmt->frame_control)) { - cmd = NL80211_CMD_UNPROT_DISASSOCIATE; + event.cmd = NL80211_CMD_UNPROT_DISASSOCIATE; } else if (ieee80211_is_beacon(mgmt->frame_control)) { if (wdev->unprot_beacon_reported && elapsed_jiffies_msecs(wdev->unprot_beacon_reported) < 10000) return; - cmd = NL80211_CMD_UNPROT_BEACON; + event.cmd = NL80211_CMD_UNPROT_BEACON; wdev->unprot_beacon_reported = jiffies; } else { return; } trace_cfg80211_rx_unprot_mlme_mgmt(dev, buf, len); - nl80211_send_mlme_event(rdev, dev, buf, len, cmd, GFP_ATOMIC, -1, - NULL, 0, false); + nl80211_send_mlme_event(rdev, dev, &event, GFP_ATOMIC); } EXPORT_SYMBOL(cfg80211_rx_unprot_mlme_mgmt); diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index aad40240d9cb..6376f3a87f8a 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -60,7 +60,7 @@ void nl80211_send_rx_auth(struct cfg80211_registered_device *rdev, const u8 *buf, size_t len, gfp_t gfp); void nl80211_send_rx_assoc(struct cfg80211_registered_device *rdev, struct net_device *netdev, - struct cfg80211_rx_assoc_resp_data *data); + const struct cfg80211_rx_assoc_resp_data *data); void nl80211_send_deauth(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *buf, size_t len, diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 2214a90cf101..43897a5269b6 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -1046,7 +1046,7 @@ rdev_nan_change_conf(struct cfg80211_registered_device *rdev, ret = rdev->ops->nan_change_conf(&rdev->wiphy, wdev, conf, changes); else - ret = -ENOTSUPP; + ret = -EOPNOTSUPP; trace_rdev_return_int(&rdev->wiphy, ret); return ret; } @@ -1200,7 +1200,7 @@ rdev_start_radar_detection(struct cfg80211_registered_device *rdev, struct cfg80211_chan_def *chandef, u32 cac_time_ms) { - int ret = -ENOTSUPP; + int ret = -EOPNOTSUPP; trace_rdev_start_radar_detection(&rdev->wiphy, dev, chandef, cac_time_ms); @@ -1226,7 +1226,7 @@ rdev_set_mcast_rate(struct cfg80211_registered_device *rdev, struct net_device *dev, int mcast_rate[NUM_NL80211_BANDS]) { - int ret = -ENOTSUPP; + int ret = -EOPNOTSUPP; trace_rdev_set_mcast_rate(&rdev->wiphy, dev, mcast_rate); if (rdev->ops->set_mcast_rate) @@ -1239,7 +1239,7 @@ static inline int rdev_set_coalesce(struct cfg80211_registered_device *rdev, struct cfg80211_coalesce *coalesce) { - int ret = -ENOTSUPP; + int ret = -EOPNOTSUPP; trace_rdev_set_coalesce(&rdev->wiphy, coalesce); if (rdev->ops->set_coalesce) @@ -1524,4 +1524,22 @@ rdev_set_hw_timestamp(struct cfg80211_registered_device *rdev, return ret; } + +static inline int +rdev_set_ttlm(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct cfg80211_ttlm_params *params) +{ + struct wiphy *wiphy = &rdev->wiphy; + int ret; + + if (!rdev->ops->set_ttlm) + return -EOPNOTSUPP; + + trace_rdev_set_ttlm(wiphy, dev, params); + ret = rdev->ops->set_ttlm(wiphy, dev, params); + trace_rdev_return_int(wiphy, ret); + + return ret; +} #endif /* __CFG80211_RDEV_OPS */ diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 9e5ccffd6868..3d260c99c348 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -1535,12 +1535,13 @@ static bool cfg80211_bss_type_match(u16 capability, } /* Returned bss is reference counted and must be cleaned up appropriately. */ -struct cfg80211_bss *cfg80211_get_bss(struct wiphy *wiphy, - struct ieee80211_channel *channel, - const u8 *bssid, - const u8 *ssid, size_t ssid_len, - enum ieee80211_bss_type bss_type, - enum ieee80211_privacy privacy) +struct cfg80211_bss *__cfg80211_get_bss(struct wiphy *wiphy, + struct ieee80211_channel *channel, + const u8 *bssid, + const u8 *ssid, size_t ssid_len, + enum ieee80211_bss_type bss_type, + enum ieee80211_privacy privacy, + u32 use_for) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct cfg80211_internal_bss *bss, *res = NULL; @@ -1565,6 +1566,8 @@ struct cfg80211_bss *cfg80211_get_bss(struct wiphy *wiphy, continue; if (!is_valid_ether_addr(bss->pub.bssid)) continue; + if ((bss->pub.use_for & use_for) != use_for) + continue; /* Don't get expired BSS structs */ if (time_after(now, bss->ts + IEEE80211_SCAN_RESULT_EXPIRE) && !atomic_read(&bss->hold)) @@ -1582,7 +1585,7 @@ struct cfg80211_bss *cfg80211_get_bss(struct wiphy *wiphy, trace_cfg80211_return_bss(&res->pub); return &res->pub; } -EXPORT_SYMBOL(cfg80211_get_bss); +EXPORT_SYMBOL(__cfg80211_get_bss); static void rb_insert_bss(struct cfg80211_registered_device *rdev, struct cfg80211_internal_bss *bss) @@ -1746,7 +1749,9 @@ cfg80211_update_known_bss(struct cfg80211_registered_device *rdev, new->pub.proberesp_ies); if (old) kfree_rcu((struct cfg80211_bss_ies *)old, rcu_head); - } else if (rcu_access_pointer(new->pub.beacon_ies)) { + } + + if (rcu_access_pointer(new->pub.beacon_ies)) { const struct cfg80211_bss_ies *old; if (known->pub.hidden_beacon_bss && @@ -1800,6 +1805,8 @@ cfg80211_update_known_bss(struct cfg80211_registered_device *rdev, ether_addr_copy(known->parent_bssid, new->parent_bssid); known->pub.max_bssid_indicator = new->pub.max_bssid_indicator; known->pub.bssid_index = new->pub.bssid_index; + known->pub.use_for &= new->pub.use_for; + known->pub.cannot_use_reasons = new->pub.cannot_use_reasons; return true; } @@ -2044,6 +2051,9 @@ struct cfg80211_inform_single_bss_data { struct cfg80211_bss *source_bss; u8 max_bssid_indicator; u8 bssid_index; + + u8 use_for; + u64 cannot_use_reasons; }; /* Returned bss is reference counted and must be cleaned up appropriately. */ @@ -2089,6 +2099,8 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy, tmp.ts_boottime = drv_data->boottime_ns; tmp.parent_tsf = drv_data->parent_tsf; ether_addr_copy(tmp.parent_bssid, drv_data->parent_bssid); + tmp.pub.use_for = data->use_for; + tmp.pub.cannot_use_reasons = data->cannot_use_reasons; if (data->bss_source != BSS_SOURCE_DIRECT) { tmp.pub.transmitted_bss = data->source_bss; @@ -2259,6 +2271,8 @@ cfg80211_parse_mbssid_data(struct wiphy *wiphy, .beacon_interval = tx_data->beacon_interval, .source_bss = source_bss, .bss_source = BSS_SOURCE_MBSSID, + .use_for = tx_data->use_for, + .cannot_use_reasons = tx_data->cannot_use_reasons, }; const u8 *mbssid_index_ie; const struct element *elem, *sub; @@ -2521,7 +2535,7 @@ error: return NULL; } -static bool +static u8 cfg80211_tbtt_info_for_mld_ap(const u8 *ie, size_t ielen, u8 mld_id, u8 link_id, const struct ieee80211_neighbor_ap_info **ap_info, const u8 **tbtt_info) @@ -2540,6 +2554,7 @@ cfg80211_tbtt_info_for_mld_ap(const u8 *ie, size_t ielen, u8 mld_id, u8 link_id, u16 params; u8 length, i, count, mld_params_offset; u8 type, lid; + u32 use_for; info = (void *)pos; count = u8_get_bits(info->tbtt_info_hdr, @@ -2549,20 +2564,22 @@ cfg80211_tbtt_info_for_mld_ap(const u8 *ie, size_t ielen, u8 mld_id, u8 link_id, pos += sizeof(*info); if (count * length > end - pos) - return false; + return 0; type = u8_get_bits(info->tbtt_info_hdr, IEEE80211_AP_INFO_TBTT_HDR_TYPE); - /* Only accept full TBTT information. NSTR mobile APs - * use the shortened version, but we ignore them here. - */ if (type == IEEE80211_TBTT_INFO_TYPE_TBTT && length >= offsetofend(struct ieee80211_tbtt_info_ge_11, mld_params)) { mld_params_offset = offsetof(struct ieee80211_tbtt_info_ge_11, mld_params); + use_for = NL80211_BSS_USE_FOR_ALL; + } else if (type == IEEE80211_TBTT_INFO_TYPE_MLD && + length >= sizeof(struct ieee80211_rnr_mld_params)) { + mld_params_offset = 0; + use_for = NL80211_BSS_USE_FOR_MLD_LINK; } else { pos += count * length; continue; @@ -2580,7 +2597,7 @@ cfg80211_tbtt_info_for_mld_ap(const u8 *ie, size_t ielen, u8 mld_id, u8 link_id, *ap_info = info; *tbtt_info = pos; - return true; + return use_for; } pos += length; @@ -2588,7 +2605,7 @@ cfg80211_tbtt_info_for_mld_ap(const u8 *ie, size_t ielen, u8 mld_id, u8 link_id, } } - return false; + return 0; } static void cfg80211_parse_ml_sta_data(struct wiphy *wiphy, @@ -2606,6 +2623,7 @@ static void cfg80211_parse_ml_sta_data(struct wiphy *wiphy, const struct element *elem; struct cfg80211_mle *mle; u16 control; + u8 ml_common_len; u8 *new_ie; struct cfg80211_bss *bss; int mld_id; @@ -2636,6 +2654,8 @@ static void cfg80211_parse_ml_sta_data(struct wiphy *wiphy, !(control & IEEE80211_MLC_BASIC_PRES_MLD_CAPA_OP)) return; + ml_common_len = ml_elem->variable[0]; + /* length + MLD MAC address + link ID info + BSS Params Change Count */ pos = ml_elem->variable + 1 + 6 + 1 + 1; @@ -2676,7 +2696,7 @@ static void cfg80211_parse_ml_sta_data(struct wiphy *wiphy, const u8 *profile; const u8 *tbtt_info; ssize_t profile_len; - u8 link_id; + u8 link_id, use_for; if (!ieee80211_mle_basic_sta_prof_size_ok((u8 *)mle->sta_prof[i], mle->sta_prof_len[i])) @@ -2718,9 +2738,11 @@ static void cfg80211_parse_ml_sta_data(struct wiphy *wiphy, profile_len -= 2; /* Find in RNR to look up channel information */ - if (!cfg80211_tbtt_info_for_mld_ap(tx_data->ie, tx_data->ielen, - mld_id, link_id, - &ap_info, &tbtt_info)) + use_for = cfg80211_tbtt_info_for_mld_ap(tx_data->ie, + tx_data->ielen, + mld_id, link_id, + &ap_info, &tbtt_info); + if (!use_for) continue; /* We could sanity check the BSSID is included */ @@ -2732,6 +2754,14 @@ static void cfg80211_parse_ml_sta_data(struct wiphy *wiphy, freq = ieee80211_channel_to_freq_khz(ap_info->channel, band); data.channel = ieee80211_get_channel_khz(wiphy, freq); + if (use_for == NL80211_BSS_USE_FOR_MLD_LINK && + !(wiphy->flags & WIPHY_FLAG_SUPPORTS_NSTR_NONPRIMARY)) { + use_for = 0; + data.cannot_use_reasons = + NL80211_BSS_CANNOT_USE_NSTR_NONPRIMARY; + } + data.use_for = use_for; + /* Generate new elements */ memset(new_ie, 0, IEEE80211_MAX_DATA_LEN); data.ie = new_ie; @@ -2742,6 +2772,34 @@ static void cfg80211_parse_ml_sta_data(struct wiphy *wiphy, if (!data.ielen) continue; + /* The generated elements do not contain: + * - Basic ML element + * - A TBTT entry in the RNR for the transmitting AP + * + * This information is needed both internally and in userspace + * as such, we should append it here. + */ + if (data.ielen + 3 + sizeof(*ml_elem) + ml_common_len > + IEEE80211_MAX_DATA_LEN) + continue; + + /* Copy the Basic Multi-Link element including the common + * information, and then fix up the link ID. + * Note that the ML element length has been verified and we + * also checked that it contains the link ID. + */ + new_ie[data.ielen++] = WLAN_EID_EXTENSION; + new_ie[data.ielen++] = 1 + sizeof(*ml_elem) + ml_common_len; + new_ie[data.ielen++] = WLAN_EID_EXT_EHT_MULTI_LINK; + memcpy(new_ie + data.ielen, ml_elem, + sizeof(*ml_elem) + ml_common_len); + + new_ie[data.ielen + sizeof(*ml_elem) + 1 + ETH_ALEN] = link_id; + + data.ielen += sizeof(*ml_elem) + ml_common_len; + + /* TODO: Add an RNR containing only the reporting AP */ + bss = cfg80211_inform_single_bss_data(wiphy, &data, gfp); if (!bss) break; @@ -2769,6 +2827,10 @@ cfg80211_inform_bss_data(struct wiphy *wiphy, .beacon_interval = beacon_interval, .ie = ie, .ielen = ielen, + .use_for = data->restrict_use ? + data->use_for : + NL80211_BSS_USE_FOR_ALL, + .cannot_use_reasons = data->cannot_use_reasons, }; struct cfg80211_bss *res; @@ -2899,6 +2961,10 @@ cfg80211_inform_single_bss_frame_data(struct wiphy *wiphy, tmp.pub.chains = data->chains; memcpy(tmp.pub.chain_signal, data->chain_signal, IEEE80211_MAX_CHAINS); ether_addr_copy(tmp.parent_bssid, data->parent_bssid); + tmp.pub.use_for = data->restrict_use ? + data->use_for : + NL80211_BSS_USE_FOR_ALL; + tmp.pub.cannot_use_reasons = data->cannot_use_reasons; signal_valid = data->chan == channel; spin_lock_bh(&rdev->bss_lock); @@ -2930,6 +2996,10 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy, .ie = mgmt->u.probe_resp.variable, .ielen = len - offsetof(struct ieee80211_mgmt, u.probe_resp.variable), + .use_for = data->restrict_use ? + data->use_for : + NL80211_BSS_USE_FOR_ALL, + .cannot_use_reasons = data->cannot_use_reasons, }; struct cfg80211_bss *res; diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 30cd1bd58aac..1f374c8a17a5 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -2928,7 +2928,7 @@ DEFINE_EVENT(netdev_evt_only, cfg80211_send_rx_auth, TRACE_EVENT(cfg80211_send_rx_assoc, TP_PROTO(struct net_device *netdev, - struct cfg80211_rx_assoc_resp_data *data), + const struct cfg80211_rx_assoc_resp_data *data), TP_ARGS(netdev, data), TP_STRUCT__entry( NETDEV_ENTRY @@ -3979,6 +3979,26 @@ TRACE_EVENT(cfg80211_links_removed, __entry->link_mask) ); +TRACE_EVENT(rdev_set_ttlm, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + struct cfg80211_ttlm_params *params), + TP_ARGS(wiphy, netdev, params), + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + __array(u8, dlink, sizeof(u16) * 8) + __array(u8, ulink, sizeof(u16) * 8) + ), + TP_fast_assign( + WIPHY_ASSIGN; + NETDEV_ASSIGN; + memcpy(__entry->dlink, params->dlink, sizeof(params->dlink)); + memcpy(__entry->ulink, params->ulink, sizeof(params->ulink)); + ), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT, + WIPHY_PR_ARG, NETDEV_PR_ARG) +); + #endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c index 06cead2b8e34..caa340134b0e 100644 --- a/net/xdp/xdp_umem.c +++ b/net/xdp/xdp_umem.c @@ -148,6 +148,11 @@ static int xdp_umem_account_pages(struct xdp_umem *umem) return 0; } +#define XDP_UMEM_FLAGS_VALID ( \ + XDP_UMEM_UNALIGNED_CHUNK_FLAG | \ + XDP_UMEM_TX_SW_CSUM | \ + 0) + static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) { bool unaligned_chunks = mr->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG; @@ -167,7 +172,7 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) return -EINVAL; } - if (mr->flags & ~XDP_UMEM_UNALIGNED_CHUNK_FLAG) + if (mr->flags & ~XDP_UMEM_FLAGS_VALID) return -EINVAL; if (!unaligned_chunks && !is_power_of_2(chunk_size)) @@ -199,6 +204,9 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) if (headroom >= chunk_size - XDP_PACKET_HEADROOM) return -EINVAL; + if (mr->tx_metadata_len >= 256 || mr->tx_metadata_len % 8) + return -EINVAL; + umem->size = size; umem->headroom = headroom; umem->chunk_size = chunk_size; @@ -207,6 +215,7 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) umem->pgs = NULL; umem->user = NULL; umem->flags = mr->flags; + umem->tx_metadata_len = mr->tx_metadata_len; INIT_LIST_HEAD(&umem->xsk_dma_list); refcount_set(&umem->users, 1); diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 3da0b52f308d..9f13aa3353e3 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -571,6 +571,13 @@ static u32 xsk_get_num_desc(struct sk_buff *skb) static void xsk_destruct_skb(struct sk_buff *skb) { + struct xsk_tx_metadata_compl *compl = &skb_shinfo(skb)->xsk_meta; + + if (compl->tx_timestamp) { + /* sw completion timestamp, not a real one */ + *compl->tx_timestamp = ktime_get_tai_fast_ns(); + } + xsk_cq_submit_locked(xdp_sk(skb->sk), xsk_get_num_desc(skb)); sock_wfree(skb); } @@ -655,8 +662,10 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs, static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, struct xdp_desc *desc) { + struct xsk_tx_metadata *meta = NULL; struct net_device *dev = xs->dev; struct sk_buff *skb = xs->skb; + bool first_frag = false; int err; if (dev->priv_flags & IFF_TX_SKB_NO_LINEAR) { @@ -687,6 +696,8 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, kfree_skb(skb); goto free_err; } + + first_frag = true; } else { int nr_frags = skb_shinfo(skb)->nr_frags; struct page *page; @@ -709,12 +720,45 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, skb_add_rx_frag(skb, nr_frags, page, 0, len, 0); } + + if (first_frag && desc->options & XDP_TX_METADATA) { + if (unlikely(xs->pool->tx_metadata_len == 0)) { + err = -EINVAL; + goto free_err; + } + + meta = buffer - xs->pool->tx_metadata_len; + if (unlikely(!xsk_buff_valid_tx_metadata(meta))) { + err = -EINVAL; + goto free_err; + } + + if (meta->flags & XDP_TXMD_FLAGS_CHECKSUM) { + if (unlikely(meta->request.csum_start + + meta->request.csum_offset + + sizeof(__sum16) > len)) { + err = -EINVAL; + goto free_err; + } + + skb->csum_start = hr + meta->request.csum_start; + skb->csum_offset = meta->request.csum_offset; + skb->ip_summed = CHECKSUM_PARTIAL; + + if (unlikely(xs->pool->tx_sw_csum)) { + err = skb_checksum_help(skb); + if (err) + goto free_err; + } + } + } } skb->dev = dev; skb->priority = READ_ONCE(xs->sk.sk_priority); skb->mark = READ_ONCE(xs->sk.sk_mark); skb->destructor = xsk_destruct_skb; + xsk_tx_metadata_to_compl(meta, &skb_shinfo(skb)->xsk_meta); xsk_set_destructor_arg(skb); return skb; @@ -1282,6 +1326,14 @@ struct xdp_umem_reg_v1 { __u32 headroom; }; +struct xdp_umem_reg_v2 { + __u64 addr; /* Start of packet data area */ + __u64 len; /* Length of packet data area */ + __u32 chunk_size; + __u32 headroom; + __u32 flags; +}; + static int xsk_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, unsigned int optlen) { @@ -1325,8 +1377,10 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname, if (optlen < sizeof(struct xdp_umem_reg_v1)) return -EINVAL; - else if (optlen < sizeof(mr)) + else if (optlen < sizeof(struct xdp_umem_reg_v2)) mr_size = sizeof(struct xdp_umem_reg_v1); + else if (optlen < sizeof(mr)) + mr_size = sizeof(struct xdp_umem_reg_v2); if (copy_from_sockptr(&mr, optval, mr_size)) return -EFAULT; diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index 49cb9f9a09be..28711cc44ced 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -85,6 +85,8 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, XDP_PACKET_HEADROOM; pool->umem = umem; pool->addrs = umem->addrs; + pool->tx_metadata_len = umem->tx_metadata_len; + pool->tx_sw_csum = umem->flags & XDP_UMEM_TX_SW_CSUM; INIT_LIST_HEAD(&pool->free_list); INIT_LIST_HEAD(&pool->xskb_list); INIT_LIST_HEAD(&pool->xsk_tx_list); @@ -123,6 +125,18 @@ void xp_set_rxq_info(struct xsk_buff_pool *pool, struct xdp_rxq_info *rxq) } EXPORT_SYMBOL(xp_set_rxq_info); +void xp_fill_cb(struct xsk_buff_pool *pool, struct xsk_cb_desc *desc) +{ + u32 i; + + for (i = 0; i < pool->heads_cnt; i++) { + struct xdp_buff_xsk *xskb = &pool->heads[i]; + + memcpy(xskb->cb + desc->off, desc->src, desc->bytes); + } +} +EXPORT_SYMBOL(xp_fill_cb); + static void xp_disable_drv_zc(struct xsk_buff_pool *pool) { struct netdev_bpf bpf; diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h index 13354a1e4280..6f2d1621c992 100644 --- a/net/xdp/xsk_queue.h +++ b/net/xdp/xsk_queue.h @@ -137,21 +137,23 @@ static inline bool xskq_cons_read_addr_unchecked(struct xsk_queue *q, u64 *addr) static inline bool xp_unused_options_set(u32 options) { - return options & ~XDP_PKT_CONTD; + return options & ~(XDP_PKT_CONTD | XDP_TX_METADATA); } static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc) { - u64 offset = desc->addr & (pool->chunk_size - 1); + u64 addr = desc->addr - pool->tx_metadata_len; + u64 len = desc->len + pool->tx_metadata_len; + u64 offset = addr & (pool->chunk_size - 1); if (!desc->len) return false; - if (offset + desc->len > pool->chunk_size) + if (offset + len > pool->chunk_size) return false; - if (desc->addr >= pool->addrs_cnt) + if (addr >= pool->addrs_cnt) return false; if (xp_unused_options_set(desc->options)) @@ -162,16 +164,17 @@ static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool, static inline bool xp_unaligned_validate_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc) { - u64 addr = xp_unaligned_add_offset_to_addr(desc->addr); + u64 addr = xp_unaligned_add_offset_to_addr(desc->addr) - pool->tx_metadata_len; + u64 len = desc->len + pool->tx_metadata_len; if (!desc->len) return false; - if (desc->len > pool->chunk_size) + if (len > pool->chunk_size) return false; - if (addr >= pool->addrs_cnt || addr + desc->len > pool->addrs_cnt || - xp_desc_crosses_non_contig_pg(pool, addr, desc->len)) + if (addr >= pool->addrs_cnt || addr + len > pool->addrs_cnt || + xp_desc_crosses_non_contig_pg(pool, addr, len)) return false; if (xp_unused_options_set(desc->options)) diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile index cd47f88921f5..547cec77ba03 100644 --- a/net/xfrm/Makefile +++ b/net/xfrm/Makefile @@ -21,3 +21,4 @@ obj-$(CONFIG_XFRM_USER_COMPAT) += xfrm_compat.o obj-$(CONFIG_XFRM_IPCOMP) += xfrm_ipcomp.o obj-$(CONFIG_XFRM_INTERFACE) += xfrm_interface.o obj-$(CONFIG_XFRM_ESPINTCP) += espintcp.o +obj-$(CONFIG_DEBUG_INFO_BTF) += xfrm_state_bpf.o diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index c13dc3ef7910..1b7e75159727 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -4218,6 +4218,8 @@ void __init xfrm_init(void) #ifdef CONFIG_XFRM_ESPINTCP espintcp_init(); #endif + + register_xfrm_state_bpf(); } #ifdef CONFIG_AUDITSYSCALL diff --git a/net/xfrm/xfrm_state_bpf.c b/net/xfrm/xfrm_state_bpf.c new file mode 100644 index 000000000000..9e20d4a377f7 --- /dev/null +++ b/net/xfrm/xfrm_state_bpf.c @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Unstable XFRM state BPF helpers. + * + * Note that it is allowed to break compatibility for these functions since the + * interface they are exposed through to BPF programs is explicitly unstable. + */ + +#include <linux/bpf.h> +#include <linux/btf.h> +#include <linux/btf_ids.h> +#include <net/xdp.h> +#include <net/xfrm.h> + +/* bpf_xfrm_state_opts - Options for XFRM state lookup helpers + * + * Members: + * @error - Out parameter, set for any errors encountered + * Values: + * -EINVAL - netns_id is less than -1 + * -EINVAL - opts__sz isn't BPF_XFRM_STATE_OPTS_SZ + * -ENONET - No network namespace found for netns_id + * -ENOENT - No xfrm_state found + * @netns_id - Specify the network namespace for lookup + * Values: + * BPF_F_CURRENT_NETNS (-1) + * Use namespace associated with ctx + * [0, S32_MAX] + * Network Namespace ID + * @mark - XFRM mark to match on + * @daddr - Destination address to match on + * @spi - Security parameter index to match on + * @proto - IP protocol to match on (eg. IPPROTO_ESP) + * @family - Protocol family to match on (AF_INET/AF_INET6) + */ +struct bpf_xfrm_state_opts { + s32 error; + s32 netns_id; + u32 mark; + xfrm_address_t daddr; + __be32 spi; + u8 proto; + u16 family; +}; + +enum { + BPF_XFRM_STATE_OPTS_SZ = sizeof(struct bpf_xfrm_state_opts), +}; + +__bpf_kfunc_start_defs(); + +/* bpf_xdp_get_xfrm_state - Get XFRM state + * + * A `struct xfrm_state *`, if found, must be released with a corresponding + * bpf_xdp_xfrm_state_release. + * + * Parameters: + * @ctx - Pointer to ctx (xdp_md) in XDP program + * Cannot be NULL + * @opts - Options for lookup (documented above) + * Cannot be NULL + * @opts__sz - Length of the bpf_xfrm_state_opts structure + * Must be BPF_XFRM_STATE_OPTS_SZ + */ +__bpf_kfunc struct xfrm_state * +bpf_xdp_get_xfrm_state(struct xdp_md *ctx, struct bpf_xfrm_state_opts *opts, u32 opts__sz) +{ + struct xdp_buff *xdp = (struct xdp_buff *)ctx; + struct net *net = dev_net(xdp->rxq->dev); + struct xfrm_state *x; + + if (!opts || opts__sz < sizeof(opts->error)) + return NULL; + + if (opts__sz != BPF_XFRM_STATE_OPTS_SZ) { + opts->error = -EINVAL; + return NULL; + } + + if (unlikely(opts->netns_id < BPF_F_CURRENT_NETNS)) { + opts->error = -EINVAL; + return NULL; + } + + if (opts->netns_id >= 0) { + net = get_net_ns_by_id(net, opts->netns_id); + if (unlikely(!net)) { + opts->error = -ENONET; + return NULL; + } + } + + x = xfrm_state_lookup(net, opts->mark, &opts->daddr, opts->spi, + opts->proto, opts->family); + + if (opts->netns_id >= 0) + put_net(net); + if (!x) + opts->error = -ENOENT; + + return x; +} + +/* bpf_xdp_xfrm_state_release - Release acquired xfrm_state object + * + * This must be invoked for referenced PTR_TO_BTF_ID, and the verifier rejects + * the program if any references remain in the program in all of the explored + * states. + * + * Parameters: + * @x - Pointer to referenced xfrm_state object, obtained using + * bpf_xdp_get_xfrm_state. + */ +__bpf_kfunc void bpf_xdp_xfrm_state_release(struct xfrm_state *x) +{ + xfrm_state_put(x); +} + +__bpf_kfunc_end_defs(); + +BTF_SET8_START(xfrm_state_kfunc_set) +BTF_ID_FLAGS(func, bpf_xdp_get_xfrm_state, KF_RET_NULL | KF_ACQUIRE) +BTF_ID_FLAGS(func, bpf_xdp_xfrm_state_release, KF_RELEASE) +BTF_SET8_END(xfrm_state_kfunc_set) + +static const struct btf_kfunc_id_set xfrm_state_xdp_kfunc_set = { + .owner = THIS_MODULE, + .set = &xfrm_state_kfunc_set, +}; + +int __init register_xfrm_state_bpf(void) +{ + return register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, + &xfrm_state_xdp_kfunc_set); +} |