diff options
Diffstat (limited to 'net')
248 files changed, 8526 insertions, 2100 deletions
diff --git a/net/Kconfig b/net/Kconfig index 5cb9de1aaf88..62da6148e9f8 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -403,7 +403,7 @@ config LWTUNNEL config LWTUNNEL_BPF bool "Execute BPF program as route nexthop action" - depends on LWTUNNEL + depends on LWTUNNEL && INET default y if LWTUNNEL=y ---help--- Allows to run BPF programs as a nexthop action following a route diff --git a/net/atm/proc.c b/net/atm/proc.c index 0b0495a41bbe..d79221fd4dae 100644 --- a/net/atm/proc.c +++ b/net/atm/proc.c @@ -134,7 +134,8 @@ static void vcc_seq_stop(struct seq_file *seq, void *v) static void *vcc_seq_next(struct seq_file *seq, void *v, loff_t *pos) { v = vcc_walk(seq, 1); - *pos += !!PTR_ERR(v); + if (v) + (*pos)++; return v; } diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig index c386e6981416..a31db5e9ac8e 100644 --- a/net/batman-adv/Kconfig +++ b/net/batman-adv/Kconfig @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -# Copyright (C) 2007-2018 B.A.T.M.A.N. contributors: +# Copyright (C) 2007-2019 B.A.T.M.A.N. contributors: # # Marek Lindner, Simon Wunderlich # diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile index 9b58160fe485..a887ecc3efa1 100644 --- a/net/batman-adv/Makefile +++ b/net/batman-adv/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -# Copyright (C) 2007-2018 B.A.T.M.A.N. contributors: +# Copyright (C) 2007-2019 B.A.T.M.A.N. contributors: # # Marek Lindner, Simon Wunderlich # diff --git a/net/batman-adv/bat_algo.c b/net/batman-adv/bat_algo.c index ea309ad06175..7b7e15641fef 100644 --- a/net/batman-adv/bat_algo.c +++ b/net/batman-adv/bat_algo.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/bat_algo.h b/net/batman-adv/bat_algo.h index 534b790c3753..25e7bb51928c 100644 --- a/net/batman-adv/bat_algo.h +++ b/net/batman-adv/bat_algo.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2011-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2011-2019 B.A.T.M.A.N. contributors: * * Marek Lindner, Linus Lüssing * diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index f97e566f0402..de61091af666 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/bat_iv_ogm.h b/net/batman-adv/bat_iv_ogm.h index 3dc6a7a43eb7..785f6666273c 100644 --- a/net/batman-adv/bat_iv_ogm.h +++ b/net/batman-adv/bat_iv_ogm.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c index 90e33f84d37a..445594ed58af 100644 --- a/net/batman-adv/bat_v.c +++ b/net/batman-adv/bat_v.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2013-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2013-2019 B.A.T.M.A.N. contributors: * * Linus Lüssing, Marek Lindner * diff --git a/net/batman-adv/bat_v.h b/net/batman-adv/bat_v.h index ec4a2a569750..465a4fc23354 100644 --- a/net/batman-adv/bat_v.h +++ b/net/batman-adv/bat_v.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2011-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2011-2019 B.A.T.M.A.N. contributors: * * Marek Lindner, Linus Lüssing * diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c index e8090f099eb8..a9b7919c9de5 100644 --- a/net/batman-adv/bat_v_elp.c +++ b/net/batman-adv/bat_v_elp.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2011-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2011-2019 B.A.T.M.A.N. contributors: * * Linus Lüssing, Marek Lindner * @@ -104,6 +104,9 @@ static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh) ret = cfg80211_get_station(real_netdev, neigh->addr, &sinfo); + /* free the TID stats immediately */ + cfg80211_sinfo_release_content(&sinfo); + dev_put(real_netdev); if (ret == -ENOENT) { /* Node is not associated anymore! It would be diff --git a/net/batman-adv/bat_v_elp.h b/net/batman-adv/bat_v_elp.h index e8c7b7fd290d..75f189ee4a1c 100644 --- a/net/batman-adv/bat_v_elp.h +++ b/net/batman-adv/bat_v_elp.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2013-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2013-2019 B.A.T.M.A.N. contributors: * * Linus Lüssing, Marek Lindner * diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c index 2948b41b06d4..c9698ad41854 100644 --- a/net/batman-adv/bat_v_ogm.c +++ b/net/batman-adv/bat_v_ogm.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2013-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2013-2019 B.A.T.M.A.N. contributors: * * Antonio Quartulli * diff --git a/net/batman-adv/bat_v_ogm.h b/net/batman-adv/bat_v_ogm.h index e5be14c908c6..f67cf7ee06b2 100644 --- a/net/batman-adv/bat_v_ogm.h +++ b/net/batman-adv/bat_v_ogm.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2013-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2013-2019 B.A.T.M.A.N. contributors: * * Antonio Quartulli * diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c index a296a4d851f5..63e134e763e3 100644 --- a/net/batman-adv/bitarray.c +++ b/net/batman-adv/bitarray.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2006-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2006-2019 B.A.T.M.A.N. contributors: * * Simon Wunderlich, Marek Lindner * diff --git a/net/batman-adv/bitarray.h b/net/batman-adv/bitarray.h index 48f683289531..f3a05ad9afad 100644 --- a/net/batman-adv/bitarray.h +++ b/net/batman-adv/bitarray.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2006-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2006-2019 B.A.T.M.A.N. contributors: * * Simon Wunderlich, Marek Lindner * diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 5fdde2947802..ef39aabdb694 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2011-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2011-2019 B.A.T.M.A.N. contributors: * * Simon Wunderlich * diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h index 71f95a3e4d3f..31771c751efb 100644 --- a/net/batman-adv/bridge_loop_avoidance.h +++ b/net/batman-adv/bridge_loop_avoidance.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2011-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2011-2019 B.A.T.M.A.N. contributors: * * Simon Wunderlich * diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c index d4a7702e48d8..3b9d1ad2f467 100644 --- a/net/batman-adv/debugfs.c +++ b/net/batman-adv/debugfs.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2010-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2010-2019 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/debugfs.h b/net/batman-adv/debugfs.h index 8de018e5c577..c0b8694041ec 100644 --- a/net/batman-adv/debugfs.h +++ b/net/batman-adv/debugfs.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2010-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2010-2019 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index b9ffe1826527..310a4f353008 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2011-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2011-2019 B.A.T.M.A.N. contributors: * * Antonio Quartulli * @@ -19,6 +19,7 @@ #include "distributed-arp-table.h" #include "main.h" +#include <asm/unaligned.h> #include <linux/atomic.h> #include <linux/bitops.h> #include <linux/byteorder/generic.h> @@ -29,6 +30,7 @@ #include <linux/if_ether.h> #include <linux/if_vlan.h> #include <linux/in.h> +#include <linux/ip.h> #include <linux/jiffies.h> #include <linux/kernel.h> #include <linux/kref.h> @@ -42,6 +44,7 @@ #include <linux/spinlock.h> #include <linux/stddef.h> #include <linux/string.h> +#include <linux/udp.h> #include <linux/workqueue.h> #include <net/arp.h> #include <net/genetlink.h> @@ -60,6 +63,49 @@ #include "translation-table.h" #include "tvlv.h" +enum batadv_bootpop { + BATADV_BOOTREPLY = 2, +}; + +enum batadv_boothtype { + BATADV_HTYPE_ETHERNET = 1, +}; + +enum batadv_dhcpoptioncode { + BATADV_DHCP_OPT_PAD = 0, + BATADV_DHCP_OPT_MSG_TYPE = 53, + BATADV_DHCP_OPT_END = 255, +}; + +enum batadv_dhcptype { + BATADV_DHCPACK = 5, +}; + +/* { 99, 130, 83, 99 } */ +#define BATADV_DHCP_MAGIC 1669485411 + +struct batadv_dhcp_packet { + __u8 op; + __u8 htype; + __u8 hlen; + __u8 hops; + __be32 xid; + __be16 secs; + __be16 flags; + __be32 ciaddr; + __be32 yiaddr; + __be32 siaddr; + __be32 giaddr; + __u8 chaddr[16]; + __u8 sname[64]; + __u8 file[128]; + __be32 magic; + __u8 options[0]; +}; + +#define BATADV_DHCP_YIADDR_LEN sizeof(((struct batadv_dhcp_packet *)0)->yiaddr) +#define BATADV_DHCP_CHADDR_LEN sizeof(((struct batadv_dhcp_packet *)0)->chaddr) + static void batadv_dat_purge(struct work_struct *work); /** @@ -1440,6 +1486,361 @@ out: } /** + * batadv_dat_check_dhcp_ipudp() - check skb for IP+UDP headers valid for DHCP + * @skb: the packet to check + * @ip_src: a buffer to store the IPv4 source address in + * + * Checks whether the given skb has an IP and UDP header valid for a DHCP + * message from a DHCP server. And if so, stores the IPv4 source address in + * the provided buffer. + * + * Return: True if valid, false otherwise. + */ +static bool +batadv_dat_check_dhcp_ipudp(struct sk_buff *skb, __be32 *ip_src) +{ + unsigned int offset = skb_network_offset(skb); + struct udphdr *udphdr, _udphdr; + struct iphdr *iphdr, _iphdr; + + iphdr = skb_header_pointer(skb, offset, sizeof(_iphdr), &_iphdr); + if (!iphdr || iphdr->version != 4 || iphdr->ihl * 4 < sizeof(_iphdr)) + return false; + + if (iphdr->protocol != IPPROTO_UDP) + return false; + + offset += iphdr->ihl * 4; + skb_set_transport_header(skb, offset); + + udphdr = skb_header_pointer(skb, offset, sizeof(_udphdr), &_udphdr); + if (!udphdr || udphdr->source != htons(67)) + return false; + + *ip_src = get_unaligned(&iphdr->saddr); + + return true; +} + +/** + * batadv_dat_check_dhcp() - examine packet for valid DHCP message + * @skb: the packet to check + * @proto: ethernet protocol hint (behind a potential vlan) + * @ip_src: a buffer to store the IPv4 source address in + * + * Checks whether the given skb is a valid DHCP packet. And if so, stores the + * IPv4 source address in the provided buffer. + * + * Caller needs to ensure that the skb network header is set correctly. + * + * Return: If skb is a valid DHCP packet, then returns its op code + * (e.g. BOOTREPLY vs. BOOTREQUEST). Otherwise returns -EINVAL. + */ +static int +batadv_dat_check_dhcp(struct sk_buff *skb, __be16 proto, __be32 *ip_src) +{ + __be32 *magic, _magic; + unsigned int offset; + struct { + __u8 op; + __u8 htype; + __u8 hlen; + __u8 hops; + } *dhcp_h, _dhcp_h; + + if (proto != htons(ETH_P_IP)) + return -EINVAL; + + if (!batadv_dat_check_dhcp_ipudp(skb, ip_src)) + return -EINVAL; + + offset = skb_transport_offset(skb) + sizeof(struct udphdr); + if (skb->len < offset + sizeof(struct batadv_dhcp_packet)) + return -EINVAL; + + dhcp_h = skb_header_pointer(skb, offset, sizeof(_dhcp_h), &_dhcp_h); + if (!dhcp_h || dhcp_h->htype != BATADV_HTYPE_ETHERNET || + dhcp_h->hlen != ETH_ALEN) + return -EINVAL; + + offset += offsetof(struct batadv_dhcp_packet, magic); + + magic = skb_header_pointer(skb, offset, sizeof(_magic), &_magic); + if (!magic || get_unaligned(magic) != htonl(BATADV_DHCP_MAGIC)) + return -EINVAL; + + return dhcp_h->op; +} + +/** + * batadv_dat_get_dhcp_message_type() - get message type of a DHCP packet + * @skb: the DHCP packet to parse + * + * Iterates over the DHCP options of the given DHCP packet to find a + * DHCP Message Type option and parse it. + * + * Caller needs to ensure that the given skb is a valid DHCP packet and + * that the skb transport header is set correctly. + * + * Return: The found DHCP message type value, if found. -EINVAL otherwise. + */ +static int batadv_dat_get_dhcp_message_type(struct sk_buff *skb) +{ + unsigned int offset = skb_transport_offset(skb) + sizeof(struct udphdr); + u8 *type, _type; + struct { + u8 type; + u8 len; + } *tl, _tl; + + offset += sizeof(struct batadv_dhcp_packet); + + while ((tl = skb_header_pointer(skb, offset, sizeof(_tl), &_tl))) { + if (tl->type == BATADV_DHCP_OPT_MSG_TYPE) + break; + + if (tl->type == BATADV_DHCP_OPT_END) + break; + + if (tl->type == BATADV_DHCP_OPT_PAD) + offset++; + else + offset += tl->len + sizeof(_tl); + } + + /* Option Overload Code not supported */ + if (!tl || tl->type != BATADV_DHCP_OPT_MSG_TYPE || + tl->len != sizeof(_type)) + return -EINVAL; + + offset += sizeof(_tl); + + type = skb_header_pointer(skb, offset, sizeof(_type), &_type); + if (!type) + return -EINVAL; + + return *type; +} + +/** + * batadv_dat_get_dhcp_yiaddr() - get yiaddr from a DHCP packet + * @skb: the DHCP packet to parse + * @buf: a buffer to store the yiaddr in + * + * Caller needs to ensure that the given skb is a valid DHCP packet and + * that the skb transport header is set correctly. + * + * Return: True on success, false otherwise. + */ +static bool batadv_dat_dhcp_get_yiaddr(struct sk_buff *skb, __be32 *buf) +{ + unsigned int offset = skb_transport_offset(skb) + sizeof(struct udphdr); + __be32 *yiaddr; + + offset += offsetof(struct batadv_dhcp_packet, yiaddr); + yiaddr = skb_header_pointer(skb, offset, BATADV_DHCP_YIADDR_LEN, buf); + + if (!yiaddr) + return false; + + if (yiaddr != buf) + *buf = get_unaligned(yiaddr); + + return true; +} + +/** + * batadv_dat_get_dhcp_chaddr() - get chaddr from a DHCP packet + * @skb: the DHCP packet to parse + * @buf: a buffer to store the chaddr in + * + * Caller needs to ensure that the given skb is a valid DHCP packet and + * that the skb transport header is set correctly. + * + * Return: True on success, false otherwise + */ +static bool batadv_dat_get_dhcp_chaddr(struct sk_buff *skb, u8 *buf) +{ + unsigned int offset = skb_transport_offset(skb) + sizeof(struct udphdr); + u8 *chaddr; + + offset += offsetof(struct batadv_dhcp_packet, chaddr); + chaddr = skb_header_pointer(skb, offset, BATADV_DHCP_CHADDR_LEN, buf); + + if (!chaddr) + return false; + + if (chaddr != buf) + memcpy(buf, chaddr, BATADV_DHCP_CHADDR_LEN); + + return true; +} + +/** + * batadv_dat_put_dhcp() - puts addresses from a DHCP packet into the DHT and + * DAT cache + * @bat_priv: the bat priv with all the soft interface information + * @chaddr: the DHCP client MAC address + * @yiaddr: the DHCP client IP address + * @hw_dst: the DHCP server MAC address + * @ip_dst: the DHCP server IP address + * @vid: VLAN identifier + * + * Adds given MAC/IP pairs to the local DAT cache and propagates them further + * into the DHT. + * + * For the DHT propagation, client MAC + IP will appear as the ARP Reply + * transmitter (and hw_dst/ip_dst as the target). + */ +static void batadv_dat_put_dhcp(struct batadv_priv *bat_priv, u8 *chaddr, + __be32 yiaddr, u8 *hw_dst, __be32 ip_dst, + unsigned short vid) +{ + struct sk_buff *skb; + + skb = batadv_dat_arp_create_reply(bat_priv, yiaddr, ip_dst, chaddr, + hw_dst, vid); + if (!skb) + return; + + skb_set_network_header(skb, ETH_HLEN); + + batadv_dat_entry_add(bat_priv, yiaddr, chaddr, vid); + batadv_dat_entry_add(bat_priv, ip_dst, hw_dst, vid); + + batadv_dat_send_data(bat_priv, skb, yiaddr, vid, BATADV_P_DAT_DHT_PUT); + batadv_dat_send_data(bat_priv, skb, ip_dst, vid, BATADV_P_DAT_DHT_PUT); + + consume_skb(skb); + + batadv_dbg(BATADV_DBG_DAT, bat_priv, + "Snooped from outgoing DHCPACK (server address): %pI4, %pM (vid: %i)\n", + &ip_dst, hw_dst, batadv_print_vid(vid)); + batadv_dbg(BATADV_DBG_DAT, bat_priv, + "Snooped from outgoing DHCPACK (client address): %pI4, %pM (vid: %i)\n", + &yiaddr, chaddr, batadv_print_vid(vid)); +} + +/** + * batadv_dat_check_dhcp_ack() - examine packet for valid DHCP message + * @skb: the packet to check + * @proto: ethernet protocol hint (behind a potential vlan) + * @ip_src: a buffer to store the IPv4 source address in + * @chaddr: a buffer to store the DHCP Client Hardware Address in + * @yiaddr: a buffer to store the DHCP Your IP Address in + * + * Checks whether the given skb is a valid DHCPACK. And if so, stores the + * IPv4 server source address (ip_src), client MAC address (chaddr) and client + * IPv4 address (yiaddr) in the provided buffers. + * + * Caller needs to ensure that the skb network header is set correctly. + * + * Return: True if the skb is a valid DHCPACK. False otherwise. + */ +static bool +batadv_dat_check_dhcp_ack(struct sk_buff *skb, __be16 proto, __be32 *ip_src, + u8 *chaddr, __be32 *yiaddr) +{ + int type; + + type = batadv_dat_check_dhcp(skb, proto, ip_src); + if (type != BATADV_BOOTREPLY) + return false; + + type = batadv_dat_get_dhcp_message_type(skb); + if (type != BATADV_DHCPACK) + return false; + + if (!batadv_dat_dhcp_get_yiaddr(skb, yiaddr)) + return false; + + if (!batadv_dat_get_dhcp_chaddr(skb, chaddr)) + return false; + + return true; +} + +/** + * batadv_dat_snoop_outgoing_dhcp_ack() - snoop DHCPACK and fill DAT with it + * @bat_priv: the bat priv with all the soft interface information + * @skb: the packet to snoop + * @proto: ethernet protocol hint (behind a potential vlan) + * @vid: VLAN identifier + * + * This function first checks whether the given skb is a valid DHCPACK. If + * so then its source MAC and IP as well as its DHCP Client Hardware Address + * field and DHCP Your IP Address field are added to the local DAT cache and + * propagated into the DHT. + * + * Caller needs to ensure that the skb mac and network headers are set + * correctly. + */ +void batadv_dat_snoop_outgoing_dhcp_ack(struct batadv_priv *bat_priv, + struct sk_buff *skb, + __be16 proto, + unsigned short vid) +{ + u8 chaddr[BATADV_DHCP_CHADDR_LEN]; + __be32 ip_src, yiaddr; + + if (!atomic_read(&bat_priv->distributed_arp_table)) + return; + + if (!batadv_dat_check_dhcp_ack(skb, proto, &ip_src, chaddr, &yiaddr)) + return; + + batadv_dat_put_dhcp(bat_priv, chaddr, yiaddr, eth_hdr(skb)->h_source, + ip_src, vid); +} + +/** + * batadv_dat_snoop_incoming_dhcp_ack() - snoop DHCPACK and fill DAT cache + * @bat_priv: the bat priv with all the soft interface information + * @skb: the packet to snoop + * @hdr_size: header size, up to the tail of the batman-adv header + * + * This function first checks whether the given skb is a valid DHCPACK. If + * so then its source MAC and IP as well as its DHCP Client Hardware Address + * field and DHCP Your IP Address field are added to the local DAT cache. + */ +void batadv_dat_snoop_incoming_dhcp_ack(struct batadv_priv *bat_priv, + struct sk_buff *skb, int hdr_size) +{ + u8 chaddr[BATADV_DHCP_CHADDR_LEN]; + struct ethhdr *ethhdr; + __be32 ip_src, yiaddr; + unsigned short vid; + __be16 proto; + u8 *hw_src; + + if (!atomic_read(&bat_priv->distributed_arp_table)) + return; + + if (unlikely(!pskb_may_pull(skb, hdr_size + ETH_HLEN))) + return; + + ethhdr = (struct ethhdr *)(skb->data + hdr_size); + skb_set_network_header(skb, hdr_size + ETH_HLEN); + proto = ethhdr->h_proto; + + if (!batadv_dat_check_dhcp_ack(skb, proto, &ip_src, chaddr, &yiaddr)) + return; + + hw_src = ethhdr->h_source; + vid = batadv_dat_get_vid(skb, &hdr_size); + + batadv_dat_entry_add(bat_priv, yiaddr, chaddr, vid); + batadv_dat_entry_add(bat_priv, ip_src, hw_src, vid); + + batadv_dbg(BATADV_DBG_DAT, bat_priv, + "Snooped from incoming DHCPACK (server address): %pI4, %pM (vid: %i)\n", + &ip_src, hw_src, batadv_print_vid(vid)); + batadv_dbg(BATADV_DBG_DAT, bat_priv, + "Snooped from incoming DHCPACK (client address): %pI4, %pM (vid: %i)\n", + &yiaddr, chaddr, batadv_print_vid(vid)); +} + +/** * batadv_dat_drop_broadcast_packet() - check if an ARP request has to be * dropped (because the node has already obtained the reply via DAT) or not * @bat_priv: the bat priv with all the soft interface information diff --git a/net/batman-adv/distributed-arp-table.h b/net/batman-adv/distributed-arp-table.h index a04596028337..68c0ff321acd 100644 --- a/net/batman-adv/distributed-arp-table.h +++ b/net/batman-adv/distributed-arp-table.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2011-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2011-2019 B.A.T.M.A.N. contributors: * * Antonio Quartulli * @@ -46,6 +46,12 @@ void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv, struct sk_buff *skb); bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv, struct sk_buff *skb, int hdr_size); +void batadv_dat_snoop_outgoing_dhcp_ack(struct batadv_priv *bat_priv, + struct sk_buff *skb, + __be16 proto, + unsigned short vid); +void batadv_dat_snoop_incoming_dhcp_ack(struct batadv_priv *bat_priv, + struct sk_buff *skb, int hdr_size); bool batadv_dat_drop_broadcast_packet(struct batadv_priv *bat_priv, struct batadv_forw_packet *forw_packet); @@ -140,6 +146,19 @@ batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv, return false; } +static inline void +batadv_dat_snoop_outgoing_dhcp_ack(struct batadv_priv *bat_priv, + struct sk_buff *skb, __be16 proto, + unsigned short vid) +{ +} + +static inline void +batadv_dat_snoop_incoming_dhcp_ack(struct batadv_priv *bat_priv, + struct sk_buff *skb, int hdr_size) +{ +} + static inline bool batadv_dat_drop_broadcast_packet(struct batadv_priv *bat_priv, struct batadv_forw_packet *forw_packet) diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index 5b71a289d04f..b506d15b8230 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2013-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2013-2019 B.A.T.M.A.N. contributors: * * Martin Hundebøll <martin@hundeboll.net> * diff --git a/net/batman-adv/fragmentation.h b/net/batman-adv/fragmentation.h index 944512e07782..abdac26579bf 100644 --- a/net/batman-adv/fragmentation.h +++ b/net/batman-adv/fragmentation.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2013-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2013-2019 B.A.T.M.A.N. contributors: * * Martin Hundebøll <martin@hundeboll.net> * diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index 9d8e5eda2314..f5811f61aa92 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2009-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2009-2019 B.A.T.M.A.N. contributors: * * Marek Lindner * @@ -47,7 +47,6 @@ #include <uapi/linux/batadv_packet.h> #include <uapi/linux/batman_adv.h> -#include "gateway_common.h" #include "hard-interface.h" #include "log.h" #include "netlink.h" diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h index f0b86fcb2493..b5732c8be81a 100644 --- a/net/batman-adv/gateway_client.h +++ b/net/batman-adv/gateway_client.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2009-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2009-2019 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c index 936c107f3199..e064de45e22c 100644 --- a/net/batman-adv/gateway_common.c +++ b/net/batman-adv/gateway_common.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2009-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2009-2019 B.A.T.M.A.N. contributors: * * Marek Lindner * @@ -28,6 +28,7 @@ #include <linux/stddef.h> #include <linux/string.h> #include <uapi/linux/batadv_packet.h> +#include <uapi/linux/batman_adv.h> #include "gateway_client.h" #include "log.h" diff --git a/net/batman-adv/gateway_common.h b/net/batman-adv/gateway_common.h index 80afb2793687..128467a0fb89 100644 --- a/net/batman-adv/gateway_common.h +++ b/net/batman-adv/gateway_common.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2009-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2009-2019 B.A.T.M.A.N. contributors: * * Marek Lindner * @@ -25,12 +25,6 @@ struct net_device; -enum batadv_gw_modes { - BATADV_GW_MODE_OFF, - BATADV_GW_MODE_CLIENT, - BATADV_GW_MODE_SERVER, -}; - /** * enum batadv_bandwidth_units - bandwidth unit types */ diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 508f4416dfc9..96ef7c70b4d9 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -20,7 +20,6 @@ #include "main.h" #include <linux/atomic.h> -#include <linux/bug.h> #include <linux/byteorder/generic.h> #include <linux/errno.h> #include <linux/gfp.h> @@ -179,8 +178,10 @@ static bool batadv_is_on_batman_iface(const struct net_device *net_dev) parent_dev = __dev_get_by_index((struct net *)parent_net, dev_get_iflink(net_dev)); /* if we got a NULL parent_dev there is something broken.. */ - if (WARN(!parent_dev, "Cannot find parent device")) + if (!parent_dev) { + pr_err("Cannot find parent device\n"); return false; + } if (batadv_mutual_parents(net_dev, net, parent_dev, parent_net)) return false; diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h index d1c0f6189301..48de28c83401 100644 --- a/net/batman-adv/hard-interface.h +++ b/net/batman-adv/hard-interface.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/hash.c b/net/batman-adv/hash.c index 9194f4d891b1..56a08ce193d5 100644 --- a/net/batman-adv/hash.c +++ b/net/batman-adv/hash.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2006-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2006-2019 B.A.T.M.A.N. contributors: * * Simon Wunderlich, Marek Lindner * diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h index 0e36fa1c7c3e..37507b6d4006 100644 --- a/net/batman-adv/hash.h +++ b/net/batman-adv/hash.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2006-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2006-2019 B.A.T.M.A.N. contributors: * * Simon Wunderlich, Marek Lindner * diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c index 6d5859714f52..9859ababb82e 100644 --- a/net/batman-adv/icmp_socket.c +++ b/net/batman-adv/icmp_socket.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/icmp_socket.h b/net/batman-adv/icmp_socket.h index 958be22beda9..5f8926522ff0 100644 --- a/net/batman-adv/icmp_socket.h +++ b/net/batman-adv/icmp_socket.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/log.c b/net/batman-adv/log.c index 75f602e1ce94..3e610df8debf 100644 --- a/net/batman-adv/log.c +++ b/net/batman-adv/log.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2010-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2010-2019 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/log.h b/net/batman-adv/log.h index 35f4f397ed57..660e9bcc85a2 100644 --- a/net/batman-adv/log.h +++ b/net/batman-adv/log.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index d1ed839fd32b..75750870cf04 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index b572066325e4..3ed669d7dc6b 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -25,7 +25,7 @@ #define BATADV_DRIVER_DEVICE "batman-adv" #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2019.0" +#define BATADV_SOURCE_VERSION "2019.1" #endif /* B.A.T.M.A.N. parameters */ diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index 1dd70f048e7b..f91b1b6265cf 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2014-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2014-2019 B.A.T.M.A.N. contributors: * * Linus Lüssing * diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h index 3b04ab13f0eb..466013fe88af 100644 --- a/net/batman-adv/multicast.h +++ b/net/batman-adv/multicast.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2014-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2014-2019 B.A.T.M.A.N. contributors: * * Linus Lüssing * diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index 2dc3304cee54..67a58da2e6a0 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2016-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2016-2019 B.A.T.M.A.N. contributors: * * Matthias Schiffer * @@ -20,13 +20,17 @@ #include "main.h" #include <linux/atomic.h> +#include <linux/bitops.h> +#include <linux/bug.h> #include <linux/byteorder/generic.h> #include <linux/cache.h> +#include <linux/err.h> #include <linux/errno.h> #include <linux/export.h> #include <linux/genetlink.h> #include <linux/gfp.h> #include <linux/if_ether.h> +#include <linux/if_vlan.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/list.h> @@ -47,21 +51,54 @@ #include "bridge_loop_avoidance.h" #include "distributed-arp-table.h" #include "gateway_client.h" +#include "gateway_common.h" #include "hard-interface.h" +#include "log.h" #include "multicast.h" +#include "network-coding.h" #include "originator.h" #include "soft-interface.h" #include "tp_meter.h" #include "translation-table.h" +struct net; + struct genl_family batadv_netlink_family; /* multicast groups */ enum batadv_netlink_multicast_groups { + BATADV_NL_MCGRP_CONFIG, BATADV_NL_MCGRP_TPMETER, }; +/** + * enum batadv_genl_ops_flags - flags for genl_ops's internal_flags + */ +enum batadv_genl_ops_flags { + /** + * @BATADV_FLAG_NEED_MESH: request requires valid soft interface in + * attribute BATADV_ATTR_MESH_IFINDEX and expects a pointer to it to be + * saved in info->user_ptr[0] + */ + BATADV_FLAG_NEED_MESH = BIT(0), + + /** + * @BATADV_FLAG_NEED_HARDIF: request requires valid hard interface in + * attribute BATADV_ATTR_HARD_IFINDEX and expects a pointer to it to be + * saved in info->user_ptr[1] + */ + BATADV_FLAG_NEED_HARDIF = BIT(1), + + /** + * @BATADV_FLAG_NEED_VLAN: request requires valid vlan in + * attribute BATADV_ATTR_VLANID and expects a pointer to it to be + * saved in info->user_ptr[1] + */ + BATADV_FLAG_NEED_VLAN = BIT(2), +}; + static const struct genl_multicast_group batadv_netlink_mcgrps[] = { + [BATADV_NL_MCGRP_CONFIG] = { .name = BATADV_NL_MCAST_GROUP_CONFIG }, [BATADV_NL_MCGRP_TPMETER] = { .name = BATADV_NL_MCAST_GROUP_TPMETER }, }; @@ -104,6 +141,26 @@ static const struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = { [BATADV_ATTR_DAT_CACHE_VID] = { .type = NLA_U16 }, [BATADV_ATTR_MCAST_FLAGS] = { .type = NLA_U32 }, [BATADV_ATTR_MCAST_FLAGS_PRIV] = { .type = NLA_U32 }, + [BATADV_ATTR_VLANID] = { .type = NLA_U16 }, + [BATADV_ATTR_AGGREGATED_OGMS_ENABLED] = { .type = NLA_U8 }, + [BATADV_ATTR_AP_ISOLATION_ENABLED] = { .type = NLA_U8 }, + [BATADV_ATTR_ISOLATION_MARK] = { .type = NLA_U32 }, + [BATADV_ATTR_ISOLATION_MASK] = { .type = NLA_U32 }, + [BATADV_ATTR_BONDING_ENABLED] = { .type = NLA_U8 }, + [BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED] = { .type = NLA_U8 }, + [BATADV_ATTR_DISTRIBUTED_ARP_TABLE_ENABLED] = { .type = NLA_U8 }, + [BATADV_ATTR_FRAGMENTATION_ENABLED] = { .type = NLA_U8 }, + [BATADV_ATTR_GW_BANDWIDTH_DOWN] = { .type = NLA_U32 }, + [BATADV_ATTR_GW_BANDWIDTH_UP] = { .type = NLA_U32 }, + [BATADV_ATTR_GW_MODE] = { .type = NLA_U8 }, + [BATADV_ATTR_GW_SEL_CLASS] = { .type = NLA_U32 }, + [BATADV_ATTR_HOP_PENALTY] = { .type = NLA_U8 }, + [BATADV_ATTR_LOG_LEVEL] = { .type = NLA_U32 }, + [BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED] = { .type = NLA_U8 }, + [BATADV_ATTR_NETWORK_CODING_ENABLED] = { .type = NLA_U8 }, + [BATADV_ATTR_ORIG_INTERVAL] = { .type = NLA_U32 }, + [BATADV_ATTR_ELP_INTERVAL] = { .type = NLA_U32 }, + [BATADV_ATTR_THROUGHPUT_OVERRIDE] = { .type = NLA_U32 }, }; /** @@ -122,20 +179,75 @@ batadv_netlink_get_ifindex(const struct nlmsghdr *nlh, int attrtype) } /** - * batadv_netlink_mesh_info_put() - fill in generic information about mesh - * interface - * @msg: netlink message to be sent back - * @soft_iface: interface for which the data should be taken + * batadv_netlink_mesh_fill_ap_isolation() - Add ap_isolation softif attribute + * @msg: Netlink message to dump into + * @bat_priv: the bat priv with all the soft interface information * - * Return: 0 on success, < 0 on error + * Return: 0 on success or negative error number in case of failure */ -static int -batadv_netlink_mesh_info_put(struct sk_buff *msg, struct net_device *soft_iface) +static int batadv_netlink_mesh_fill_ap_isolation(struct sk_buff *msg, + struct batadv_priv *bat_priv) +{ + struct batadv_softif_vlan *vlan; + u8 ap_isolation; + + vlan = batadv_softif_vlan_get(bat_priv, BATADV_NO_FLAGS); + if (!vlan) + return 0; + + ap_isolation = atomic_read(&vlan->ap_isolation); + batadv_softif_vlan_put(vlan); + + return nla_put_u8(msg, BATADV_ATTR_AP_ISOLATION_ENABLED, + !!ap_isolation); +} + +/** + * batadv_option_set_ap_isolation() - Set ap_isolation from genl msg + * @attr: parsed BATADV_ATTR_AP_ISOLATION_ENABLED attribute + * @bat_priv: the bat priv with all the soft interface information + * + * Return: 0 on success or negative error number in case of failure + */ +static int batadv_netlink_set_mesh_ap_isolation(struct nlattr *attr, + struct batadv_priv *bat_priv) +{ + struct batadv_softif_vlan *vlan; + + vlan = batadv_softif_vlan_get(bat_priv, BATADV_NO_FLAGS); + if (!vlan) + return -ENOENT; + + atomic_set(&vlan->ap_isolation, !!nla_get_u8(attr)); + batadv_softif_vlan_put(vlan); + + return 0; +} + +/** + * batadv_netlink_mesh_fill() - Fill message with mesh attributes + * @msg: Netlink message to dump into + * @bat_priv: the bat priv with all the soft interface information + * @cmd: type of message to generate + * @portid: Port making netlink request + * @seq: sequence number for message + * @flags: Additional flags for message + * + * Return: 0 on success or negative error number in case of failure + */ +static int batadv_netlink_mesh_fill(struct sk_buff *msg, + struct batadv_priv *bat_priv, + enum batadv_nl_commands cmd, + u32 portid, u32 seq, int flags) { - struct batadv_priv *bat_priv = netdev_priv(soft_iface); + struct net_device *soft_iface = bat_priv->soft_iface; struct batadv_hard_iface *primary_if = NULL; struct net_device *hard_iface; - int ret = -ENOBUFS; + void *hdr; + + hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, flags, cmd); + if (!hdr) + return -ENOBUFS; if (nla_put_string(msg, BATADV_ATTR_VERSION, BATADV_SOURCE_VERSION) || nla_put_string(msg, BATADV_ATTR_ALGO_NAME, @@ -146,16 +258,16 @@ batadv_netlink_mesh_info_put(struct sk_buff *msg, struct net_device *soft_iface) soft_iface->dev_addr) || nla_put_u8(msg, BATADV_ATTR_TT_TTVN, (u8)atomic_read(&bat_priv->tt.vn))) - goto out; + goto nla_put_failure; #ifdef CONFIG_BATMAN_ADV_BLA if (nla_put_u16(msg, BATADV_ATTR_BLA_CRC, ntohs(bat_priv->bla.claim_dest.group))) - goto out; + goto nla_put_failure; #endif if (batadv_mcast_mesh_info_put(msg, bat_priv)) - goto out; + goto nla_put_failure; primary_if = batadv_primary_if_get_selected(bat_priv); if (primary_if && primary_if->if_status == BATADV_IF_ACTIVE) { @@ -167,77 +279,345 @@ batadv_netlink_mesh_info_put(struct sk_buff *msg, struct net_device *soft_iface) hard_iface->name) || nla_put(msg, BATADV_ATTR_HARD_ADDRESS, ETH_ALEN, hard_iface->dev_addr)) - goto out; + goto nla_put_failure; } - ret = 0; + if (nla_put_u8(msg, BATADV_ATTR_AGGREGATED_OGMS_ENABLED, + !!atomic_read(&bat_priv->aggregated_ogms))) + goto nla_put_failure; + + if (batadv_netlink_mesh_fill_ap_isolation(msg, bat_priv)) + goto nla_put_failure; + + if (nla_put_u32(msg, BATADV_ATTR_ISOLATION_MARK, + bat_priv->isolation_mark)) + goto nla_put_failure; + + if (nla_put_u32(msg, BATADV_ATTR_ISOLATION_MASK, + bat_priv->isolation_mark_mask)) + goto nla_put_failure; + + if (nla_put_u8(msg, BATADV_ATTR_BONDING_ENABLED, + !!atomic_read(&bat_priv->bonding))) + goto nla_put_failure; + +#ifdef CONFIG_BATMAN_ADV_BLA + if (nla_put_u8(msg, BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED, + !!atomic_read(&bat_priv->bridge_loop_avoidance))) + goto nla_put_failure; +#endif /* CONFIG_BATMAN_ADV_BLA */ + +#ifdef CONFIG_BATMAN_ADV_DAT + if (nla_put_u8(msg, BATADV_ATTR_DISTRIBUTED_ARP_TABLE_ENABLED, + !!atomic_read(&bat_priv->distributed_arp_table))) + goto nla_put_failure; +#endif /* CONFIG_BATMAN_ADV_DAT */ + + if (nla_put_u8(msg, BATADV_ATTR_FRAGMENTATION_ENABLED, + !!atomic_read(&bat_priv->fragmentation))) + goto nla_put_failure; + + if (nla_put_u32(msg, BATADV_ATTR_GW_BANDWIDTH_DOWN, + atomic_read(&bat_priv->gw.bandwidth_down))) + goto nla_put_failure; + + if (nla_put_u32(msg, BATADV_ATTR_GW_BANDWIDTH_UP, + atomic_read(&bat_priv->gw.bandwidth_up))) + goto nla_put_failure; + + if (nla_put_u8(msg, BATADV_ATTR_GW_MODE, + atomic_read(&bat_priv->gw.mode))) + goto nla_put_failure; + + if (bat_priv->algo_ops->gw.get_best_gw_node && + bat_priv->algo_ops->gw.is_eligible) { + /* GW selection class is not available if the routing algorithm + * in use does not implement the GW API + */ + if (nla_put_u32(msg, BATADV_ATTR_GW_SEL_CLASS, + atomic_read(&bat_priv->gw.sel_class))) + goto nla_put_failure; + } + + if (nla_put_u8(msg, BATADV_ATTR_HOP_PENALTY, + atomic_read(&bat_priv->hop_penalty))) + goto nla_put_failure; + +#ifdef CONFIG_BATMAN_ADV_DEBUG + if (nla_put_u32(msg, BATADV_ATTR_LOG_LEVEL, + atomic_read(&bat_priv->log_level))) + goto nla_put_failure; +#endif /* CONFIG_BATMAN_ADV_DEBUG */ + +#ifdef CONFIG_BATMAN_ADV_MCAST + if (nla_put_u8(msg, BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED, + !atomic_read(&bat_priv->multicast_mode))) + goto nla_put_failure; +#endif /* CONFIG_BATMAN_ADV_MCAST */ + +#ifdef CONFIG_BATMAN_ADV_NC + if (nla_put_u8(msg, BATADV_ATTR_NETWORK_CODING_ENABLED, + !!atomic_read(&bat_priv->network_coding))) + goto nla_put_failure; +#endif /* CONFIG_BATMAN_ADV_NC */ + + if (nla_put_u32(msg, BATADV_ATTR_ORIG_INTERVAL, + atomic_read(&bat_priv->orig_interval))) + goto nla_put_failure; - out: if (primary_if) batadv_hardif_put(primary_if); - return ret; + genlmsg_end(msg, hdr); + return 0; + +nla_put_failure: + if (primary_if) + batadv_hardif_put(primary_if); + + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; } /** - * batadv_netlink_get_mesh_info() - handle incoming BATADV_CMD_GET_MESH_INFO - * netlink request - * @skb: received netlink message - * @info: receiver information + * batadv_netlink_notify_mesh() - send softif attributes to listener + * @bat_priv: the bat priv with all the soft interface information * * Return: 0 on success, < 0 on error */ -static int -batadv_netlink_get_mesh_info(struct sk_buff *skb, struct genl_info *info) +int batadv_netlink_notify_mesh(struct batadv_priv *bat_priv) { - struct net *net = genl_info_net(info); - struct net_device *soft_iface; - struct sk_buff *msg = NULL; - void *msg_head; - int ifindex; + struct sk_buff *msg; int ret; - if (!info->attrs[BATADV_ATTR_MESH_IFINDEX]) - return -EINVAL; - - ifindex = nla_get_u32(info->attrs[BATADV_ATTR_MESH_IFINDEX]); - if (!ifindex) - return -EINVAL; + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; - soft_iface = dev_get_by_index(net, ifindex); - if (!soft_iface || !batadv_softif_is_valid(soft_iface)) { - ret = -ENODEV; - goto out; + ret = batadv_netlink_mesh_fill(msg, bat_priv, BATADV_CMD_SET_MESH, + 0, 0, 0); + if (ret < 0) { + nlmsg_free(msg); + return ret; } + genlmsg_multicast_netns(&batadv_netlink_family, + dev_net(bat_priv->soft_iface), msg, 0, + BATADV_NL_MCGRP_CONFIG, GFP_KERNEL); + + return 0; +} + +/** + * batadv_netlink_get_mesh() - Get softif attributes + * @skb: Netlink message with request data + * @info: receiver information + * + * Return: 0 on success or negative error number in case of failure + */ +static int batadv_netlink_get_mesh(struct sk_buff *skb, struct genl_info *info) +{ + struct batadv_priv *bat_priv = info->user_ptr[0]; + struct sk_buff *msg; + int ret; + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!msg) { - ret = -ENOMEM; - goto out; + if (!msg) + return -ENOMEM; + + ret = batadv_netlink_mesh_fill(msg, bat_priv, BATADV_CMD_GET_MESH, + info->snd_portid, info->snd_seq, 0); + if (ret < 0) { + nlmsg_free(msg); + return ret; } - msg_head = genlmsg_put(msg, info->snd_portid, info->snd_seq, - &batadv_netlink_family, 0, - BATADV_CMD_GET_MESH_INFO); - if (!msg_head) { - ret = -ENOBUFS; - goto out; + ret = genlmsg_reply(msg, info); + + return ret; +} + +/** + * batadv_netlink_set_mesh() - Set softif attributes + * @skb: Netlink message with request data + * @info: receiver information + * + * Return: 0 on success or negative error number in case of failure + */ +static int batadv_netlink_set_mesh(struct sk_buff *skb, struct genl_info *info) +{ + struct batadv_priv *bat_priv = info->user_ptr[0]; + struct nlattr *attr; + + if (info->attrs[BATADV_ATTR_AGGREGATED_OGMS_ENABLED]) { + attr = info->attrs[BATADV_ATTR_AGGREGATED_OGMS_ENABLED]; + + atomic_set(&bat_priv->aggregated_ogms, !!nla_get_u8(attr)); } - ret = batadv_netlink_mesh_info_put(msg, soft_iface); + if (info->attrs[BATADV_ATTR_AP_ISOLATION_ENABLED]) { + attr = info->attrs[BATADV_ATTR_AP_ISOLATION_ENABLED]; - out: - if (soft_iface) - dev_put(soft_iface); + batadv_netlink_set_mesh_ap_isolation(attr, bat_priv); + } - if (ret) { - if (msg) - nlmsg_free(msg); - return ret; + if (info->attrs[BATADV_ATTR_ISOLATION_MARK]) { + attr = info->attrs[BATADV_ATTR_ISOLATION_MARK]; + + bat_priv->isolation_mark = nla_get_u32(attr); } - genlmsg_end(msg, msg_head); - return genlmsg_reply(msg, info); + if (info->attrs[BATADV_ATTR_ISOLATION_MASK]) { + attr = info->attrs[BATADV_ATTR_ISOLATION_MASK]; + + bat_priv->isolation_mark_mask = nla_get_u32(attr); + } + + if (info->attrs[BATADV_ATTR_BONDING_ENABLED]) { + attr = info->attrs[BATADV_ATTR_BONDING_ENABLED]; + + atomic_set(&bat_priv->bonding, !!nla_get_u8(attr)); + } + +#ifdef CONFIG_BATMAN_ADV_BLA + if (info->attrs[BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED]) { + attr = info->attrs[BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED]; + + atomic_set(&bat_priv->bridge_loop_avoidance, + !!nla_get_u8(attr)); + batadv_bla_status_update(bat_priv->soft_iface); + } +#endif /* CONFIG_BATMAN_ADV_BLA */ + +#ifdef CONFIG_BATMAN_ADV_DAT + if (info->attrs[BATADV_ATTR_DISTRIBUTED_ARP_TABLE_ENABLED]) { + attr = info->attrs[BATADV_ATTR_DISTRIBUTED_ARP_TABLE_ENABLED]; + + atomic_set(&bat_priv->distributed_arp_table, + !!nla_get_u8(attr)); + batadv_dat_status_update(bat_priv->soft_iface); + } +#endif /* CONFIG_BATMAN_ADV_DAT */ + + if (info->attrs[BATADV_ATTR_FRAGMENTATION_ENABLED]) { + attr = info->attrs[BATADV_ATTR_FRAGMENTATION_ENABLED]; + + atomic_set(&bat_priv->fragmentation, !!nla_get_u8(attr)); + batadv_update_min_mtu(bat_priv->soft_iface); + } + + if (info->attrs[BATADV_ATTR_GW_BANDWIDTH_DOWN]) { + attr = info->attrs[BATADV_ATTR_GW_BANDWIDTH_DOWN]; + + atomic_set(&bat_priv->gw.bandwidth_down, nla_get_u32(attr)); + batadv_gw_tvlv_container_update(bat_priv); + } + + if (info->attrs[BATADV_ATTR_GW_BANDWIDTH_UP]) { + attr = info->attrs[BATADV_ATTR_GW_BANDWIDTH_UP]; + + atomic_set(&bat_priv->gw.bandwidth_up, nla_get_u32(attr)); + batadv_gw_tvlv_container_update(bat_priv); + } + + if (info->attrs[BATADV_ATTR_GW_MODE]) { + u8 gw_mode; + + attr = info->attrs[BATADV_ATTR_GW_MODE]; + gw_mode = nla_get_u8(attr); + + if (gw_mode <= BATADV_GW_MODE_SERVER) { + /* Invoking batadv_gw_reselect() is not enough to really + * de-select the current GW. It will only instruct the + * gateway client code to perform a re-election the next + * time that this is needed. + * + * When gw client mode is being switched off the current + * GW must be de-selected explicitly otherwise no GW_ADD + * uevent is thrown on client mode re-activation. This + * is operation is performed in + * batadv_gw_check_client_stop(). + */ + batadv_gw_reselect(bat_priv); + + /* always call batadv_gw_check_client_stop() before + * changing the gateway state + */ + batadv_gw_check_client_stop(bat_priv); + atomic_set(&bat_priv->gw.mode, gw_mode); + batadv_gw_tvlv_container_update(bat_priv); + } + } + + if (info->attrs[BATADV_ATTR_GW_SEL_CLASS] && + bat_priv->algo_ops->gw.get_best_gw_node && + bat_priv->algo_ops->gw.is_eligible) { + /* setting the GW selection class is allowed only if the routing + * algorithm in use implements the GW API + */ + + u32 sel_class_max = 0xffffffffu; + u32 sel_class; + + attr = info->attrs[BATADV_ATTR_GW_SEL_CLASS]; + sel_class = nla_get_u32(attr); + + if (!bat_priv->algo_ops->gw.store_sel_class) + sel_class_max = BATADV_TQ_MAX_VALUE; + + if (sel_class >= 1 && sel_class <= sel_class_max) { + atomic_set(&bat_priv->gw.sel_class, sel_class); + batadv_gw_reselect(bat_priv); + } + } + + if (info->attrs[BATADV_ATTR_HOP_PENALTY]) { + attr = info->attrs[BATADV_ATTR_HOP_PENALTY]; + + atomic_set(&bat_priv->hop_penalty, nla_get_u8(attr)); + } + +#ifdef CONFIG_BATMAN_ADV_DEBUG + if (info->attrs[BATADV_ATTR_LOG_LEVEL]) { + attr = info->attrs[BATADV_ATTR_LOG_LEVEL]; + + atomic_set(&bat_priv->log_level, + nla_get_u32(attr) & BATADV_DBG_ALL); + } +#endif /* CONFIG_BATMAN_ADV_DEBUG */ + +#ifdef CONFIG_BATMAN_ADV_MCAST + if (info->attrs[BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED]) { + attr = info->attrs[BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED]; + + atomic_set(&bat_priv->multicast_mode, !nla_get_u8(attr)); + } +#endif /* CONFIG_BATMAN_ADV_MCAST */ + +#ifdef CONFIG_BATMAN_ADV_NC + if (info->attrs[BATADV_ATTR_NETWORK_CODING_ENABLED]) { + attr = info->attrs[BATADV_ATTR_NETWORK_CODING_ENABLED]; + + atomic_set(&bat_priv->network_coding, !!nla_get_u8(attr)); + batadv_nc_status_update(bat_priv->soft_iface); + } +#endif /* CONFIG_BATMAN_ADV_NC */ + + if (info->attrs[BATADV_ATTR_ORIG_INTERVAL]) { + u32 orig_interval; + + attr = info->attrs[BATADV_ATTR_ORIG_INTERVAL]; + orig_interval = nla_get_u32(attr); + + orig_interval = min_t(u32, orig_interval, INT_MAX); + orig_interval = max_t(u32, orig_interval, 2 * BATADV_JITTER); + + atomic_set(&bat_priv->orig_interval, orig_interval); + } + + batadv_netlink_notify_mesh(bat_priv); + + return 0; } /** @@ -329,40 +709,24 @@ err_genlmsg: static int batadv_netlink_tp_meter_start(struct sk_buff *skb, struct genl_info *info) { - struct net *net = genl_info_net(info); - struct net_device *soft_iface; - struct batadv_priv *bat_priv; + struct batadv_priv *bat_priv = info->user_ptr[0]; struct sk_buff *msg = NULL; u32 test_length; void *msg_head; - int ifindex; u32 cookie; u8 *dst; int ret; - if (!info->attrs[BATADV_ATTR_MESH_IFINDEX]) - return -EINVAL; - if (!info->attrs[BATADV_ATTR_ORIG_ADDRESS]) return -EINVAL; if (!info->attrs[BATADV_ATTR_TPMETER_TEST_TIME]) return -EINVAL; - ifindex = nla_get_u32(info->attrs[BATADV_ATTR_MESH_IFINDEX]); - if (!ifindex) - return -EINVAL; - dst = nla_data(info->attrs[BATADV_ATTR_ORIG_ADDRESS]); test_length = nla_get_u32(info->attrs[BATADV_ATTR_TPMETER_TEST_TIME]); - soft_iface = dev_get_by_index(net, ifindex); - if (!soft_iface || !batadv_softif_is_valid(soft_iface)) { - ret = -ENODEV; - goto out; - } - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) { ret = -ENOMEM; @@ -377,15 +741,11 @@ batadv_netlink_tp_meter_start(struct sk_buff *skb, struct genl_info *info) goto out; } - bat_priv = netdev_priv(soft_iface); batadv_tp_start(bat_priv, dst, test_length, &cookie); ret = batadv_netlink_tp_meter_put(msg, cookie); out: - if (soft_iface) - dev_put(soft_iface); - if (ret) { if (msg) nlmsg_free(msg); @@ -406,65 +766,53 @@ batadv_netlink_tp_meter_start(struct sk_buff *skb, struct genl_info *info) static int batadv_netlink_tp_meter_cancel(struct sk_buff *skb, struct genl_info *info) { - struct net *net = genl_info_net(info); - struct net_device *soft_iface; - struct batadv_priv *bat_priv; - int ifindex; + struct batadv_priv *bat_priv = info->user_ptr[0]; u8 *dst; int ret = 0; - if (!info->attrs[BATADV_ATTR_MESH_IFINDEX]) - return -EINVAL; - if (!info->attrs[BATADV_ATTR_ORIG_ADDRESS]) return -EINVAL; - ifindex = nla_get_u32(info->attrs[BATADV_ATTR_MESH_IFINDEX]); - if (!ifindex) - return -EINVAL; - dst = nla_data(info->attrs[BATADV_ATTR_ORIG_ADDRESS]); - soft_iface = dev_get_by_index(net, ifindex); - if (!soft_iface || !batadv_softif_is_valid(soft_iface)) { - ret = -ENODEV; - goto out; - } - - bat_priv = netdev_priv(soft_iface); batadv_tp_stop(bat_priv, dst, BATADV_TP_REASON_CANCEL); -out: - if (soft_iface) - dev_put(soft_iface); - return ret; } /** - * batadv_netlink_dump_hardif_entry() - Dump one hard interface into a message + * batadv_netlink_hardif_fill() - Fill message with hardif attributes * @msg: Netlink message to dump into + * @bat_priv: the bat priv with all the soft interface information + * @hard_iface: hard interface which was modified + * @cmd: type of message to generate * @portid: Port making netlink request + * @seq: sequence number for message + * @flags: Additional flags for message * @cb: Control block containing additional options - * @hard_iface: Hard interface to dump * - * Return: error code, or 0 on success + * Return: 0 on success or negative error number in case of failure */ -static int -batadv_netlink_dump_hardif_entry(struct sk_buff *msg, u32 portid, - struct netlink_callback *cb, - struct batadv_hard_iface *hard_iface) +static int batadv_netlink_hardif_fill(struct sk_buff *msg, + struct batadv_priv *bat_priv, + struct batadv_hard_iface *hard_iface, + enum batadv_nl_commands cmd, + u32 portid, u32 seq, int flags, + struct netlink_callback *cb) { struct net_device *net_dev = hard_iface->net_dev; void *hdr; - hdr = genlmsg_put(msg, portid, cb->nlh->nlmsg_seq, - &batadv_netlink_family, NLM_F_MULTI, - BATADV_CMD_GET_HARDIFS); + hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, flags, cmd); if (!hdr) - return -EMSGSIZE; + return -ENOBUFS; + + if (cb) + genl_dump_check_consistent(cb, hdr); - genl_dump_check_consistent(cb, hdr); + if (nla_put_u32(msg, BATADV_ATTR_MESH_IFINDEX, + bat_priv->soft_iface->ifindex)) + goto nla_put_failure; if (nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX, net_dev->ifindex) || @@ -479,27 +827,137 @@ batadv_netlink_dump_hardif_entry(struct sk_buff *msg, u32 portid, goto nla_put_failure; } +#ifdef CONFIG_BATMAN_ADV_BATMAN_V + if (nla_put_u32(msg, BATADV_ATTR_ELP_INTERVAL, + atomic_read(&hard_iface->bat_v.elp_interval))) + goto nla_put_failure; + + if (nla_put_u32(msg, BATADV_ATTR_THROUGHPUT_OVERRIDE, + atomic_read(&hard_iface->bat_v.throughput_override))) + goto nla_put_failure; +#endif /* CONFIG_BATMAN_ADV_BATMAN_V */ + genlmsg_end(msg, hdr); return 0; - nla_put_failure: +nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } /** - * batadv_netlink_dump_hardifs() - Dump all hard interface into a messages + * batadv_netlink_notify_hardif() - send hardif attributes to listener + * @bat_priv: the bat priv with all the soft interface information + * @hard_iface: hard interface which was modified + * + * Return: 0 on success, < 0 on error + */ +int batadv_netlink_notify_hardif(struct batadv_priv *bat_priv, + struct batadv_hard_iface *hard_iface) +{ + struct sk_buff *msg; + int ret; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + ret = batadv_netlink_hardif_fill(msg, bat_priv, hard_iface, + BATADV_CMD_SET_HARDIF, 0, 0, 0, NULL); + if (ret < 0) { + nlmsg_free(msg); + return ret; + } + + genlmsg_multicast_netns(&batadv_netlink_family, + dev_net(bat_priv->soft_iface), msg, 0, + BATADV_NL_MCGRP_CONFIG, GFP_KERNEL); + + return 0; +} + +/** + * batadv_netlink_get_hardif() - Get hardif attributes + * @skb: Netlink message with request data + * @info: receiver information + * + * Return: 0 on success or negative error number in case of failure + */ +static int batadv_netlink_get_hardif(struct sk_buff *skb, + struct genl_info *info) +{ + struct batadv_hard_iface *hard_iface = info->user_ptr[1]; + struct batadv_priv *bat_priv = info->user_ptr[0]; + struct sk_buff *msg; + int ret; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + ret = batadv_netlink_hardif_fill(msg, bat_priv, hard_iface, + BATADV_CMD_GET_HARDIF, + info->snd_portid, info->snd_seq, 0, + NULL); + if (ret < 0) { + nlmsg_free(msg); + return ret; + } + + ret = genlmsg_reply(msg, info); + + return ret; +} + +/** + * batadv_netlink_set_hardif() - Set hardif attributes + * @skb: Netlink message with request data + * @info: receiver information + * + * Return: 0 on success or negative error number in case of failure + */ +static int batadv_netlink_set_hardif(struct sk_buff *skb, + struct genl_info *info) +{ + struct batadv_hard_iface *hard_iface = info->user_ptr[1]; + struct batadv_priv *bat_priv = info->user_ptr[0]; + +#ifdef CONFIG_BATMAN_ADV_BATMAN_V + struct nlattr *attr; + + if (info->attrs[BATADV_ATTR_ELP_INTERVAL]) { + attr = info->attrs[BATADV_ATTR_ELP_INTERVAL]; + + atomic_set(&hard_iface->bat_v.elp_interval, nla_get_u32(attr)); + } + + if (info->attrs[BATADV_ATTR_THROUGHPUT_OVERRIDE]) { + attr = info->attrs[BATADV_ATTR_THROUGHPUT_OVERRIDE]; + + atomic_set(&hard_iface->bat_v.throughput_override, + nla_get_u32(attr)); + } +#endif /* CONFIG_BATMAN_ADV_BATMAN_V */ + + batadv_netlink_notify_hardif(bat_priv, hard_iface); + + return 0; +} + +/** + * batadv_netlink_dump_hardif() - Dump all hard interface into a messages * @msg: Netlink message to dump into * @cb: Parameters from query * * Return: error code, or length of reply message on success */ static int -batadv_netlink_dump_hardifs(struct sk_buff *msg, struct netlink_callback *cb) +batadv_netlink_dump_hardif(struct sk_buff *msg, struct netlink_callback *cb) { struct net *net = sock_net(cb->skb->sk); struct net_device *soft_iface; struct batadv_hard_iface *hard_iface; + struct batadv_priv *bat_priv; int ifindex; int portid = NETLINK_CB(cb->skb).portid; int skip = cb->args[0]; @@ -519,6 +977,8 @@ batadv_netlink_dump_hardifs(struct sk_buff *msg, struct netlink_callback *cb) return -ENODEV; } + bat_priv = netdev_priv(soft_iface); + rtnl_lock(); cb->seq = batadv_hardif_generation << 1 | 1; @@ -529,8 +989,10 @@ batadv_netlink_dump_hardifs(struct sk_buff *msg, struct netlink_callback *cb) if (i++ < skip) continue; - if (batadv_netlink_dump_hardif_entry(msg, portid, cb, - hard_iface)) { + if (batadv_netlink_hardif_fill(msg, bat_priv, hard_iface, + BATADV_CMD_GET_HARDIF, + portid, cb->nlh->nlmsg_seq, + NLM_F_MULTI, cb)) { i--; break; } @@ -545,24 +1007,361 @@ batadv_netlink_dump_hardifs(struct sk_buff *msg, struct netlink_callback *cb) return msg->len; } +/** + * batadv_netlink_vlan_fill() - Fill message with vlan attributes + * @msg: Netlink message to dump into + * @bat_priv: the bat priv with all the soft interface information + * @vlan: vlan which was modified + * @cmd: type of message to generate + * @portid: Port making netlink request + * @seq: sequence number for message + * @flags: Additional flags for message + * + * Return: 0 on success or negative error number in case of failure + */ +static int batadv_netlink_vlan_fill(struct sk_buff *msg, + struct batadv_priv *bat_priv, + struct batadv_softif_vlan *vlan, + enum batadv_nl_commands cmd, + u32 portid, u32 seq, int flags) +{ + void *hdr; + + hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, flags, cmd); + if (!hdr) + return -ENOBUFS; + + if (nla_put_u32(msg, BATADV_ATTR_MESH_IFINDEX, + bat_priv->soft_iface->ifindex)) + goto nla_put_failure; + + if (nla_put_u32(msg, BATADV_ATTR_VLANID, vlan->vid & VLAN_VID_MASK)) + goto nla_put_failure; + + if (nla_put_u8(msg, BATADV_ATTR_AP_ISOLATION_ENABLED, + !!atomic_read(&vlan->ap_isolation))) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +/** + * batadv_netlink_notify_vlan() - send vlan attributes to listener + * @bat_priv: the bat priv with all the soft interface information + * @vlan: vlan which was modified + * + * Return: 0 on success, < 0 on error + */ +int batadv_netlink_notify_vlan(struct batadv_priv *bat_priv, + struct batadv_softif_vlan *vlan) +{ + struct sk_buff *msg; + int ret; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + ret = batadv_netlink_vlan_fill(msg, bat_priv, vlan, + BATADV_CMD_SET_VLAN, 0, 0, 0); + if (ret < 0) { + nlmsg_free(msg); + return ret; + } + + genlmsg_multicast_netns(&batadv_netlink_family, + dev_net(bat_priv->soft_iface), msg, 0, + BATADV_NL_MCGRP_CONFIG, GFP_KERNEL); + + return 0; +} + +/** + * batadv_netlink_get_vlan() - Get vlan attributes + * @skb: Netlink message with request data + * @info: receiver information + * + * Return: 0 on success or negative error number in case of failure + */ +static int batadv_netlink_get_vlan(struct sk_buff *skb, struct genl_info *info) +{ + struct batadv_softif_vlan *vlan = info->user_ptr[1]; + struct batadv_priv *bat_priv = info->user_ptr[0]; + struct sk_buff *msg; + int ret; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + ret = batadv_netlink_vlan_fill(msg, bat_priv, vlan, BATADV_CMD_GET_VLAN, + info->snd_portid, info->snd_seq, 0); + if (ret < 0) { + nlmsg_free(msg); + return ret; + } + + ret = genlmsg_reply(msg, info); + + return ret; +} + +/** + * batadv_netlink_set_vlan() - Get vlan attributes + * @skb: Netlink message with request data + * @info: receiver information + * + * Return: 0 on success or negative error number in case of failure + */ +static int batadv_netlink_set_vlan(struct sk_buff *skb, struct genl_info *info) +{ + struct batadv_softif_vlan *vlan = info->user_ptr[1]; + struct batadv_priv *bat_priv = info->user_ptr[0]; + struct nlattr *attr; + + if (info->attrs[BATADV_ATTR_AP_ISOLATION_ENABLED]) { + attr = info->attrs[BATADV_ATTR_AP_ISOLATION_ENABLED]; + + atomic_set(&vlan->ap_isolation, !!nla_get_u8(attr)); + } + + batadv_netlink_notify_vlan(bat_priv, vlan); + + return 0; +} + +/** + * batadv_get_softif_from_info() - Retrieve soft interface from genl attributes + * @net: the applicable net namespace + * @info: receiver information + * + * Return: Pointer to soft interface (with increased refcnt) on success, error + * pointer on error + */ +static struct net_device * +batadv_get_softif_from_info(struct net *net, struct genl_info *info) +{ + struct net_device *soft_iface; + int ifindex; + + if (!info->attrs[BATADV_ATTR_MESH_IFINDEX]) + return ERR_PTR(-EINVAL); + + ifindex = nla_get_u32(info->attrs[BATADV_ATTR_MESH_IFINDEX]); + + soft_iface = dev_get_by_index(net, ifindex); + if (!soft_iface) + return ERR_PTR(-ENODEV); + + if (!batadv_softif_is_valid(soft_iface)) + goto err_put_softif; + + return soft_iface; + +err_put_softif: + dev_put(soft_iface); + + return ERR_PTR(-EINVAL); +} + +/** + * batadv_get_hardif_from_info() - Retrieve hardif from genl attributes + * @bat_priv: the bat priv with all the soft interface information + * @net: the applicable net namespace + * @info: receiver information + * + * Return: Pointer to hard interface (with increased refcnt) on success, error + * pointer on error + */ +static struct batadv_hard_iface * +batadv_get_hardif_from_info(struct batadv_priv *bat_priv, struct net *net, + struct genl_info *info) +{ + struct batadv_hard_iface *hard_iface; + struct net_device *hard_dev; + unsigned int hardif_index; + + if (!info->attrs[BATADV_ATTR_HARD_IFINDEX]) + return ERR_PTR(-EINVAL); + + hardif_index = nla_get_u32(info->attrs[BATADV_ATTR_HARD_IFINDEX]); + + hard_dev = dev_get_by_index(net, hardif_index); + if (!hard_dev) + return ERR_PTR(-ENODEV); + + hard_iface = batadv_hardif_get_by_netdev(hard_dev); + if (!hard_iface) + goto err_put_harddev; + + if (hard_iface->soft_iface != bat_priv->soft_iface) + goto err_put_hardif; + + /* hard_dev is referenced by hard_iface and not needed here */ + dev_put(hard_dev); + + return hard_iface; + +err_put_hardif: + batadv_hardif_put(hard_iface); +err_put_harddev: + dev_put(hard_dev); + + return ERR_PTR(-EINVAL); +} + +/** + * batadv_get_vlan_from_info() - Retrieve vlan from genl attributes + * @bat_priv: the bat priv with all the soft interface information + * @net: the applicable net namespace + * @info: receiver information + * + * Return: Pointer to vlan on success (with increased refcnt), error pointer + * on error + */ +static struct batadv_softif_vlan * +batadv_get_vlan_from_info(struct batadv_priv *bat_priv, struct net *net, + struct genl_info *info) +{ + struct batadv_softif_vlan *vlan; + u16 vid; + + if (!info->attrs[BATADV_ATTR_VLANID]) + return ERR_PTR(-EINVAL); + + vid = nla_get_u16(info->attrs[BATADV_ATTR_VLANID]); + + vlan = batadv_softif_vlan_get(bat_priv, vid | BATADV_VLAN_HAS_TAG); + if (!vlan) + return ERR_PTR(-ENOENT); + + return vlan; +} + +/** + * batadv_pre_doit() - Prepare batman-adv genl doit request + * @ops: requested netlink operation + * @skb: Netlink message with request data + * @info: receiver information + * + * Return: 0 on success or negative error number in case of failure + */ +static int batadv_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, + struct genl_info *info) +{ + struct net *net = genl_info_net(info); + struct batadv_hard_iface *hard_iface; + struct batadv_priv *bat_priv = NULL; + struct batadv_softif_vlan *vlan; + struct net_device *soft_iface; + u8 user_ptr1_flags; + u8 mesh_dep_flags; + int ret; + + user_ptr1_flags = BATADV_FLAG_NEED_HARDIF | BATADV_FLAG_NEED_VLAN; + if (WARN_ON(hweight8(ops->internal_flags & user_ptr1_flags) > 1)) + return -EINVAL; + + mesh_dep_flags = BATADV_FLAG_NEED_HARDIF | BATADV_FLAG_NEED_VLAN; + if (WARN_ON((ops->internal_flags & mesh_dep_flags) && + (~ops->internal_flags & BATADV_FLAG_NEED_MESH))) + return -EINVAL; + + if (ops->internal_flags & BATADV_FLAG_NEED_MESH) { + soft_iface = batadv_get_softif_from_info(net, info); + if (IS_ERR(soft_iface)) + return PTR_ERR(soft_iface); + + bat_priv = netdev_priv(soft_iface); + info->user_ptr[0] = bat_priv; + } + + if (ops->internal_flags & BATADV_FLAG_NEED_HARDIF) { + hard_iface = batadv_get_hardif_from_info(bat_priv, net, info); + if (IS_ERR(hard_iface)) { + ret = PTR_ERR(hard_iface); + goto err_put_softif; + } + + info->user_ptr[1] = hard_iface; + } + + if (ops->internal_flags & BATADV_FLAG_NEED_VLAN) { + vlan = batadv_get_vlan_from_info(bat_priv, net, info); + if (IS_ERR(vlan)) { + ret = PTR_ERR(vlan); + goto err_put_softif; + } + + info->user_ptr[1] = vlan; + } + + return 0; + +err_put_softif: + if (bat_priv) + dev_put(bat_priv->soft_iface); + + return ret; +} + +/** + * batadv_post_doit() - End batman-adv genl doit request + * @ops: requested netlink operation + * @skb: Netlink message with request data + * @info: receiver information + */ +static void batadv_post_doit(const struct genl_ops *ops, struct sk_buff *skb, + struct genl_info *info) +{ + struct batadv_hard_iface *hard_iface; + struct batadv_softif_vlan *vlan; + struct batadv_priv *bat_priv; + + if (ops->internal_flags & BATADV_FLAG_NEED_HARDIF && + info->user_ptr[1]) { + hard_iface = info->user_ptr[1]; + + batadv_hardif_put(hard_iface); + } + + if (ops->internal_flags & BATADV_FLAG_NEED_VLAN && info->user_ptr[1]) { + vlan = info->user_ptr[1]; + batadv_softif_vlan_put(vlan); + } + + if (ops->internal_flags & BATADV_FLAG_NEED_MESH && info->user_ptr[0]) { + bat_priv = info->user_ptr[0]; + dev_put(bat_priv->soft_iface); + } +} + static const struct genl_ops batadv_netlink_ops[] = { { - .cmd = BATADV_CMD_GET_MESH_INFO, - .flags = GENL_ADMIN_PERM, + .cmd = BATADV_CMD_GET_MESH, + /* can be retrieved by unprivileged users */ .policy = batadv_netlink_policy, - .doit = batadv_netlink_get_mesh_info, + .doit = batadv_netlink_get_mesh, + .internal_flags = BATADV_FLAG_NEED_MESH, }, { .cmd = BATADV_CMD_TP_METER, .flags = GENL_ADMIN_PERM, .policy = batadv_netlink_policy, .doit = batadv_netlink_tp_meter_start, + .internal_flags = BATADV_FLAG_NEED_MESH, }, { .cmd = BATADV_CMD_TP_METER_CANCEL, .flags = GENL_ADMIN_PERM, .policy = batadv_netlink_policy, .doit = batadv_netlink_tp_meter_cancel, + .internal_flags = BATADV_FLAG_NEED_MESH, }, { .cmd = BATADV_CMD_GET_ROUTING_ALGOS, @@ -571,10 +1370,13 @@ static const struct genl_ops batadv_netlink_ops[] = { .dumpit = batadv_algo_dump, }, { - .cmd = BATADV_CMD_GET_HARDIFS, - .flags = GENL_ADMIN_PERM, + .cmd = BATADV_CMD_GET_HARDIF, + /* can be retrieved by unprivileged users */ .policy = batadv_netlink_policy, - .dumpit = batadv_netlink_dump_hardifs, + .dumpit = batadv_netlink_dump_hardif, + .doit = batadv_netlink_get_hardif, + .internal_flags = BATADV_FLAG_NEED_MESH | + BATADV_FLAG_NEED_HARDIF, }, { .cmd = BATADV_CMD_GET_TRANSTABLE_LOCAL, @@ -630,7 +1432,37 @@ static const struct genl_ops batadv_netlink_ops[] = { .policy = batadv_netlink_policy, .dumpit = batadv_mcast_flags_dump, }, - + { + .cmd = BATADV_CMD_SET_MESH, + .flags = GENL_ADMIN_PERM, + .policy = batadv_netlink_policy, + .doit = batadv_netlink_set_mesh, + .internal_flags = BATADV_FLAG_NEED_MESH, + }, + { + .cmd = BATADV_CMD_SET_HARDIF, + .flags = GENL_ADMIN_PERM, + .policy = batadv_netlink_policy, + .doit = batadv_netlink_set_hardif, + .internal_flags = BATADV_FLAG_NEED_MESH | + BATADV_FLAG_NEED_HARDIF, + }, + { + .cmd = BATADV_CMD_GET_VLAN, + /* can be retrieved by unprivileged users */ + .policy = batadv_netlink_policy, + .doit = batadv_netlink_get_vlan, + .internal_flags = BATADV_FLAG_NEED_MESH | + BATADV_FLAG_NEED_VLAN, + }, + { + .cmd = BATADV_CMD_SET_VLAN, + .flags = GENL_ADMIN_PERM, + .policy = batadv_netlink_policy, + .doit = batadv_netlink_set_vlan, + .internal_flags = BATADV_FLAG_NEED_MESH | + BATADV_FLAG_NEED_VLAN, + }, }; struct genl_family batadv_netlink_family __ro_after_init = { @@ -639,6 +1471,8 @@ struct genl_family batadv_netlink_family __ro_after_init = { .version = 1, .maxattr = BATADV_ATTR_MAX, .netnsok = true, + .pre_doit = batadv_pre_doit, + .post_doit = batadv_post_doit, .module = THIS_MODULE, .ops = batadv_netlink_ops, .n_ops = ARRAY_SIZE(batadv_netlink_ops), diff --git a/net/batman-adv/netlink.h b/net/batman-adv/netlink.h index 571d9a5ae7aa..7273368544fc 100644 --- a/net/batman-adv/netlink.h +++ b/net/batman-adv/netlink.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2016-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2016-2019 B.A.T.M.A.N. contributors: * * Matthias Schiffer * @@ -34,6 +34,12 @@ int batadv_netlink_tpmeter_notify(struct batadv_priv *bat_priv, const u8 *dst, u8 result, u32 test_time, u64 total_bytes, u32 cookie); +int batadv_netlink_notify_mesh(struct batadv_priv *bat_priv); +int batadv_netlink_notify_hardif(struct batadv_priv *bat_priv, + struct batadv_hard_iface *hard_iface); +int batadv_netlink_notify_vlan(struct batadv_priv *bat_priv, + struct batadv_softif_vlan *vlan); + extern struct genl_family batadv_netlink_family; #endif /* _NET_BATMAN_ADV_NETLINK_H_ */ diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index 34caf129a9bf..278762bd94c6 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2012-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2012-2019 B.A.T.M.A.N. contributors: * * Martin Hundebøll, Jeppe Ledet-Pedersen * diff --git a/net/batman-adv/network-coding.h b/net/batman-adv/network-coding.h index 65c346812bc1..96ef0a511fc7 100644 --- a/net/batman-adv/network-coding.h +++ b/net/batman-adv/network-coding.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2012-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2012-2019 B.A.T.M.A.N. contributors: * * Martin Hundebøll, Jeppe Ledet-Pedersen * diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 56a981af5c92..e5cdf89ef63c 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2009-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2009-2019 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h index a8b4c7b667ec..dca1e4a34ec6 100644 --- a/net/batman-adv/originator.h +++ b/net/batman-adv/originator.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index cc3ed93a6d51..cae0e5dd0768 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -1043,6 +1043,8 @@ int batadv_recv_unicast_packet(struct sk_buff *skb, hdr_size)) goto rx_success; + batadv_dat_snoop_incoming_dhcp_ack(bat_priv, skb, hdr_size); + batadv_interface_rx(recv_if->soft_iface, skb, hdr_size, orig_node); @@ -1278,6 +1280,8 @@ int batadv_recv_bcast_packet(struct sk_buff *skb, if (batadv_dat_snoop_incoming_arp_reply(bat_priv, skb, hdr_size)) goto rx_success; + batadv_dat_snoop_incoming_dhcp_ack(bat_priv, skb, hdr_size); + /* broadcast for me */ batadv_interface_rx(recv_if->soft_iface, skb, hdr_size, orig_node); diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h index db54c2d9b8bf..0102d69d345c 100644 --- a/net/batman-adv/routing.h +++ b/net/batman-adv/routing.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index 4a35f5c2f52b..66a8b3e44501 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h index 64cce07b8fe6..1f6132922e60 100644 --- a/net/batman-adv/send.h +++ b/net/batman-adv/send.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 5db5a0a4c959..2e367230376b 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@ -50,13 +50,13 @@ #include <linux/string.h> #include <linux/types.h> #include <uapi/linux/batadv_packet.h> +#include <uapi/linux/batman_adv.h> #include "bat_algo.h" #include "bridge_loop_avoidance.h" #include "debugfs.h" #include "distributed-arp-table.h" #include "gateway_client.h" -#include "gateway_common.h" #include "hard-interface.h" #include "multicast.h" #include "network-coding.h" @@ -212,6 +212,7 @@ static netdev_tx_t batadv_interface_tx(struct sk_buff *skb, enum batadv_forw_mode forw_mode; struct batadv_orig_node *mcast_single_orig = NULL; int network_offset = ETH_HLEN; + __be16 proto; if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE) goto dropped; @@ -221,14 +222,21 @@ static netdev_tx_t batadv_interface_tx(struct sk_buff *skb, netif_trans_update(soft_iface); vid = batadv_get_vid(skb, 0); + + skb_reset_mac_header(skb); ethhdr = eth_hdr(skb); - switch (ntohs(ethhdr->h_proto)) { + proto = ethhdr->h_proto; + + switch (ntohs(proto)) { case ETH_P_8021Q: + if (!pskb_may_pull(skb, sizeof(*vhdr))) + goto dropped; vhdr = vlan_eth_hdr(skb); + proto = vhdr->h_vlan_encapsulated_proto; /* drop batman-in-batman packets to prevent loops */ - if (vhdr->h_vlan_encapsulated_proto != htons(ETH_P_BATMAN)) { + if (proto != htons(ETH_P_BATMAN)) { network_offset += VLAN_HLEN; break; } @@ -256,6 +264,9 @@ static netdev_tx_t batadv_interface_tx(struct sk_buff *skb, goto dropped; } + /* Snoop address candidates from DHCPACKs for early DAT filling */ + batadv_dat_snoop_outgoing_dhcp_ack(bat_priv, skb, proto, vid); + /* don't accept stp packets. STP does not help in meshes. * better use the bridge loop avoidance ... * diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h index daf87f07fadd..538bb661878c 100644 --- a/net/batman-adv/soft-interface.h +++ b/net/batman-adv/soft-interface.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c index 09427fc6494a..0b4b3fb778a6 100644 --- a/net/batman-adv/sysfs.c +++ b/net/batman-adv/sysfs.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2010-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2010-2019 B.A.T.M.A.N. contributors: * * Marek Lindner * @@ -40,6 +40,7 @@ #include <linux/stringify.h> #include <linux/workqueue.h> #include <uapi/linux/batadv_packet.h> +#include <uapi/linux/batman_adv.h> #include "bridge_loop_avoidance.h" #include "distributed-arp-table.h" @@ -47,6 +48,7 @@ #include "gateway_common.h" #include "hard-interface.h" #include "log.h" +#include "netlink.h" #include "network-coding.h" #include "soft-interface.h" @@ -153,9 +155,14 @@ ssize_t batadv_store_##_name(struct kobject *kobj, \ { \ struct net_device *net_dev = batadv_kobj_to_netdev(kobj); \ struct batadv_priv *bat_priv = netdev_priv(net_dev); \ + ssize_t length; \ + \ + length = __batadv_store_bool_attr(buff, count, _post_func, attr,\ + &bat_priv->_name, net_dev); \ \ - return __batadv_store_bool_attr(buff, count, _post_func, attr, \ - &bat_priv->_name, net_dev); \ + batadv_netlink_notify_mesh(bat_priv); \ + \ + return length; \ } #define BATADV_ATTR_SIF_SHOW_BOOL(_name) \ @@ -185,11 +192,16 @@ ssize_t batadv_store_##_name(struct kobject *kobj, \ { \ struct net_device *net_dev = batadv_kobj_to_netdev(kobj); \ struct batadv_priv *bat_priv = netdev_priv(net_dev); \ + ssize_t length; \ \ - return __batadv_store_uint_attr(buff, count, _min, _max, \ - _post_func, attr, \ - &bat_priv->_var, net_dev, \ - NULL); \ + length = __batadv_store_uint_attr(buff, count, _min, _max, \ + _post_func, attr, \ + &bat_priv->_var, net_dev, \ + NULL); \ + \ + batadv_netlink_notify_mesh(bat_priv); \ + \ + return length; \ } #define BATADV_ATTR_SIF_SHOW_UINT(_name, _var) \ @@ -222,6 +234,11 @@ ssize_t batadv_store_vlan_##_name(struct kobject *kobj, \ attr, &vlan->_name, \ bat_priv->soft_iface); \ \ + if (vlan->vid) \ + batadv_netlink_notify_vlan(bat_priv, vlan); \ + else \ + batadv_netlink_notify_mesh(bat_priv); \ + \ batadv_softif_vlan_put(vlan); \ return res; \ } @@ -255,6 +272,7 @@ ssize_t batadv_store_##_name(struct kobject *kobj, \ { \ struct net_device *net_dev = batadv_kobj_to_netdev(kobj); \ struct batadv_hard_iface *hard_iface; \ + struct batadv_priv *bat_priv; \ ssize_t length; \ \ hard_iface = batadv_hardif_get_by_netdev(net_dev); \ @@ -267,6 +285,11 @@ ssize_t batadv_store_##_name(struct kobject *kobj, \ hard_iface->soft_iface, \ net_dev); \ \ + if (hard_iface->soft_iface) { \ + bat_priv = netdev_priv(hard_iface->soft_iface); \ + batadv_netlink_notify_hardif(bat_priv, hard_iface); \ + } \ + \ batadv_hardif_put(hard_iface); \ return length; \ } @@ -536,6 +559,9 @@ static ssize_t batadv_store_gw_mode(struct kobject *kobj, batadv_gw_check_client_stop(bat_priv); atomic_set(&bat_priv->gw.mode, (unsigned int)gw_mode_tmp); batadv_gw_tvlv_container_update(bat_priv); + + batadv_netlink_notify_mesh(bat_priv); + return count; } @@ -562,6 +588,7 @@ static ssize_t batadv_store_gw_sel_class(struct kobject *kobj, size_t count) { struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); + ssize_t length; /* setting the GW selection class is allowed only if the routing * algorithm in use implements the GW API @@ -577,10 +604,14 @@ static ssize_t batadv_store_gw_sel_class(struct kobject *kobj, return bat_priv->algo_ops->gw.store_sel_class(bat_priv, buff, count); - return __batadv_store_uint_attr(buff, count, 1, BATADV_TQ_MAX_VALUE, - batadv_post_gw_reselect, attr, - &bat_priv->gw.sel_class, - bat_priv->soft_iface, NULL); + length = __batadv_store_uint_attr(buff, count, 1, BATADV_TQ_MAX_VALUE, + batadv_post_gw_reselect, attr, + &bat_priv->gw.sel_class, + bat_priv->soft_iface, NULL); + + batadv_netlink_notify_mesh(bat_priv); + + return length; } static ssize_t batadv_show_gw_bwidth(struct kobject *kobj, @@ -600,12 +631,18 @@ static ssize_t batadv_store_gw_bwidth(struct kobject *kobj, struct attribute *attr, char *buff, size_t count) { + struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); struct net_device *net_dev = batadv_kobj_to_netdev(kobj); + ssize_t length; if (buff[count - 1] == '\n') buff[count - 1] = '\0'; - return batadv_gw_bandwidth_set(net_dev, buff, count); + length = batadv_gw_bandwidth_set(net_dev, buff, count); + + batadv_netlink_notify_mesh(bat_priv); + + return length; } /** @@ -673,6 +710,8 @@ static ssize_t batadv_store_isolation_mark(struct kobject *kobj, "New skb mark for extended isolation: %#.8x/%#.8x\n", bat_priv->isolation_mark, bat_priv->isolation_mark_mask); + batadv_netlink_notify_mesh(bat_priv); + return count; } @@ -1077,6 +1116,7 @@ static ssize_t batadv_store_throughput_override(struct kobject *kobj, struct attribute *attr, char *buff, size_t count) { + struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); struct net_device *net_dev = batadv_kobj_to_netdev(kobj); struct batadv_hard_iface *hard_iface; u32 tp_override; @@ -1107,6 +1147,8 @@ static ssize_t batadv_store_throughput_override(struct kobject *kobj, atomic_set(&hard_iface->bat_v.throughput_override, tp_override); + batadv_netlink_notify_hardif(bat_priv, hard_iface); + out: batadv_hardif_put(hard_iface); return count; diff --git a/net/batman-adv/sysfs.h b/net/batman-adv/sysfs.h index c1e3fb69952d..705ffbe763f4 100644 --- a/net/batman-adv/sysfs.h +++ b/net/batman-adv/sysfs.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2010-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2010-2019 B.A.T.M.A.N. contributors: * * Marek Lindner * diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c index 11520de96ccb..500109bbd551 100644 --- a/net/batman-adv/tp_meter.c +++ b/net/batman-adv/tp_meter.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2012-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2012-2019 B.A.T.M.A.N. contributors: * * Edo Monticelli, Antonio Quartulli * diff --git a/net/batman-adv/tp_meter.h b/net/batman-adv/tp_meter.h index 68e600974759..6b4d0f733896 100644 --- a/net/batman-adv/tp_meter.h +++ b/net/batman-adv/tp_meter.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2012-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2012-2019 B.A.T.M.A.N. contributors: * * Edo Monticelli, Antonio Quartulli * diff --git a/net/batman-adv/trace.c b/net/batman-adv/trace.c index 8e1024217cff..f77c917ed20d 100644 --- a/net/batman-adv/trace.c +++ b/net/batman-adv/trace.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2010-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2010-2019 B.A.T.M.A.N. contributors: * * Sven Eckelmann * diff --git a/net/batman-adv/trace.h b/net/batman-adv/trace.h index 104784be94d7..5e5579051400 100644 --- a/net/batman-adv/trace.h +++ b/net/batman-adv/trace.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2010-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2010-2019 B.A.T.M.A.N. contributors: * * Sven Eckelmann * diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 8dcd4968cde7..f73d79139ae7 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich, Antonio Quartulli * diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h index 01b6c8eafaf9..61bca75e5911 100644 --- a/net/batman-adv/translation-table.h +++ b/net/batman-adv/translation-table.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich, Antonio Quartulli * diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c index 40e69c9346d2..7e947b01919d 100644 --- a/net/batman-adv/tvlv.c +++ b/net/batman-adv/tvlv.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/tvlv.h b/net/batman-adv/tvlv.h index ef5867f49824..c0f033b1acb8 100644 --- a/net/batman-adv/tvlv.h +++ b/net/batman-adv/tvlv.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index cbe17da36fcb..a21b34ed6548 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 1506e1632394..65228bfa4487 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -1383,9 +1383,9 @@ static void hci_sock_cmsg(struct sock *sk, struct msghdr *msg, if (mask & HCI_CMSG_TSTAMP) { #ifdef CONFIG_COMPAT - struct compat_timeval ctv; + struct old_timeval32 ctv; #endif - struct timeval tv; + struct __kernel_old_timeval tv; void *data; int len; diff --git a/net/bpfilter/Makefile b/net/bpfilter/Makefile index 0947ee7f70d5..5d6c7760142d 100644 --- a/net/bpfilter/Makefile +++ b/net/bpfilter/Makefile @@ -5,7 +5,6 @@ hostprogs-y := bpfilter_umh bpfilter_umh-objs := main.o -KBUILD_HOSTCFLAGS += -I. -Itools/include/ -Itools/include/uapi HOSTCC := $(CC) ifeq ($(CONFIG_BPFILTER_UMH), y) diff --git a/net/bpfilter/main.c b/net/bpfilter/main.c index 1317f108df8a..61ce8454a88e 100644 --- a/net/bpfilter/main.c +++ b/net/bpfilter/main.c @@ -6,7 +6,7 @@ #include <sys/socket.h> #include <fcntl.h> #include <unistd.h> -#include "include/uapi/linux/bpf.h" +#include "../../include/uapi/linux/bpf.h" #include <asm/unistd.h> #include "msgfmt.h" diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 780757b7a82f..4a048fd1cbea 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1018,8 +1018,7 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, if (!nsrcs) return -EINVAL; - grec_len = sizeof(*grec) + - sizeof(struct in6_addr) * ntohs(*nsrcs); + grec_len = struct_size(grec, grec_src, ntohs(*nsrcs)); if (!ipv6_mc_may_pull(skb, len + grec_len)) return -EINVAL; @@ -1841,7 +1840,7 @@ static void br_ip4_multicast_join_snoopers(struct net_bridge *br) if (!in_dev) return; - ip_mc_inc_group(in_dev, htonl(INADDR_ALLSNOOPERS_GROUP)); + __ip_mc_inc_group(in_dev, htonl(INADDR_ALLSNOOPERS_GROUP), GFP_ATOMIC); in_dev_put(in_dev); } @@ -1872,7 +1871,7 @@ static void br_ip4_multicast_leave_snoopers(struct net_bridge *br) if (WARN_ON(!in_dev)) return; - ip_mc_dec_group(in_dev, htonl(INADDR_ALLSNOOPERS_GROUP)); + __ip_mc_dec_group(in_dev, htonl(INADDR_ALLSNOOPERS_GROUP), GFP_ATOMIC); in_dev_put(in_dev); } diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index 4d2b9eb7604a..db9e8ab96d48 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -14,7 +14,7 @@ static int br_switchdev_mark_get(struct net_bridge *br, struct net_device *dev) /* dev is yet to be added to the port list. */ list_for_each_entry(p, &br->port_list, list) { - if (switchdev_port_same_parent_id(dev, p->dev)) + if (netdev_port_same_parent_id(dev, p->dev)) return p->offload_fwd_mark; } @@ -23,15 +23,12 @@ static int br_switchdev_mark_get(struct net_bridge *br, struct net_device *dev) int nbp_switchdev_mark_set(struct net_bridge_port *p) { - struct switchdev_attr attr = { - .orig_dev = p->dev, - .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, - }; + struct netdev_phys_item_id ppid = { }; int err; ASSERT_RTNL(); - err = switchdev_port_attr_get(p->dev, &attr); + err = dev_get_port_parent_id(p->dev, &ppid, true); if (err) { if (err == -EOPNOTSUPP) return 0; diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 5e55cef0cec3..6693e209efe8 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -2293,9 +2293,12 @@ static int compat_do_replace(struct net *net, void __user *user, xt_compat_lock(NFPROTO_BRIDGE); - ret = xt_compat_init_offsets(NFPROTO_BRIDGE, tmp.nentries); - if (ret < 0) - goto out_unlock; + if (tmp.nentries) { + ret = xt_compat_init_offsets(NFPROTO_BRIDGE, tmp.nentries); + if (ret < 0) + goto out_unlock; + } + ret = compat_copy_entries(entries_tmp, tmp.entries_size, &state); if (ret < 0) goto out_unlock; diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c index 38c2b7a890dd..37ac5ca0ffdf 100644 --- a/net/caif/cfpkt_skbuff.c +++ b/net/caif/cfpkt_skbuff.c @@ -319,16 +319,12 @@ struct cfpkt *cfpkt_append(struct cfpkt *dstpkt, if (tmppkt == NULL) return NULL; tmp = pkt_to_skb(tmppkt); - skb_set_tail_pointer(tmp, dstlen); - tmp->len = dstlen; - memcpy(tmp->data, dst->data, dstlen); + skb_put_data(tmp, dst->data, dstlen); cfpkt_destroy(dstpkt); dst = tmp; } - memcpy(skb_tail_pointer(dst), add->data, skb_headlen(add)); + skb_put_data(dst, add->data, skb_headlen(add)); cfpkt_destroy(addpkt); - dst->tail += addlen; - dst->len += addlen; return skb_to_pkt(dst); } @@ -359,13 +355,11 @@ struct cfpkt *cfpkt_split(struct cfpkt *pkt, u16 pos) if (skb2 == NULL) return NULL; + skb_put_data(skb2, split, len2nd); + /* Reduce the length of the original packet */ - skb_set_tail_pointer(skb, pos); - skb->len = pos; + skb_trim(skb, pos); - memcpy(skb2->data, split, len2nd); - skb2->tail += len2nd; - skb2->len += len2nd; skb2->priority = skb->priority; return skb_to_pkt(skb2); } diff --git a/net/compat.c b/net/compat.c index 959d1c51826d..9629f053d4fa 100644 --- a/net/compat.c +++ b/net/compat.c @@ -209,8 +209,8 @@ int put_cmsg_compat(struct msghdr *kmsg, int level, int type, int len, void *dat { struct compat_cmsghdr __user *cm = (struct compat_cmsghdr __user *) kmsg->msg_control; struct compat_cmsghdr cmhdr; - struct compat_timeval ctv; - struct compat_timespec cts[3]; + struct old_timeval32 ctv; + struct old_timespec32 cts[3]; int cmlen; if (cm == NULL || kmsg->msg_controllen < sizeof(*cm)) { @@ -219,16 +219,16 @@ int put_cmsg_compat(struct msghdr *kmsg, int level, int type, int len, void *dat } if (!COMPAT_USE_64BIT_TIME) { - if (level == SOL_SOCKET && type == SCM_TIMESTAMP) { - struct timeval *tv = (struct timeval *)data; + if (level == SOL_SOCKET && type == SO_TIMESTAMP_OLD) { + struct __kernel_old_timeval *tv = (struct __kernel_old_timeval *)data; ctv.tv_sec = tv->tv_sec; ctv.tv_usec = tv->tv_usec; data = &ctv; len = sizeof(ctv); } if (level == SOL_SOCKET && - (type == SCM_TIMESTAMPNS || type == SCM_TIMESTAMPING)) { - int count = type == SCM_TIMESTAMPNS ? 1 : 3; + (type == SO_TIMESTAMPNS_OLD || type == SO_TIMESTAMPING_OLD)) { + int count = type == SO_TIMESTAMPNS_OLD ? 1 : 3; int i; struct timespec *ts = (struct timespec *)data; for (i = 0; i < count; i++) { @@ -348,28 +348,6 @@ static int do_set_attach_filter(struct socket *sock, int level, int optname, sizeof(struct sock_fprog)); } -static int do_set_sock_timeout(struct socket *sock, int level, - int optname, char __user *optval, unsigned int optlen) -{ - struct compat_timeval __user *up = (struct compat_timeval __user *)optval; - struct timeval ktime; - mm_segment_t old_fs; - int err; - - if (optlen < sizeof(*up)) - return -EINVAL; - if (!access_ok(up, sizeof(*up)) || - __get_user(ktime.tv_sec, &up->tv_sec) || - __get_user(ktime.tv_usec, &up->tv_usec)) - return -EFAULT; - old_fs = get_fs(); - set_fs(KERNEL_DS); - err = sock_setsockopt(sock, level, optname, (char *)&ktime, sizeof(ktime)); - set_fs(old_fs); - - return err; -} - static int compat_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen) { @@ -377,10 +355,6 @@ static int compat_sock_setsockopt(struct socket *sock, int level, int optname, optname == SO_ATTACH_REUSEPORT_CBPF) return do_set_attach_filter(sock, level, optname, optval, optlen); - if (!COMPAT_USE_64BIT_TIME && - (optname == SO_RCVTIMEO || optname == SO_SNDTIMEO)) - return do_set_sock_timeout(sock, level, optname, optval, optlen); - return sock_setsockopt(sock, level, optname, optval, optlen); } @@ -417,44 +391,6 @@ COMPAT_SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname, return __compat_sys_setsockopt(fd, level, optname, optval, optlen); } -static int do_get_sock_timeout(struct socket *sock, int level, int optname, - char __user *optval, int __user *optlen) -{ - struct compat_timeval __user *up; - struct timeval ktime; - mm_segment_t old_fs; - int len, err; - - up = (struct compat_timeval __user *) optval; - if (get_user(len, optlen)) - return -EFAULT; - if (len < sizeof(*up)) - return -EINVAL; - len = sizeof(ktime); - old_fs = get_fs(); - set_fs(KERNEL_DS); - err = sock_getsockopt(sock, level, optname, (char *) &ktime, &len); - set_fs(old_fs); - - if (!err) { - if (put_user(sizeof(*up), optlen) || - !access_ok(up, sizeof(*up)) || - __put_user(ktime.tv_sec, &up->tv_sec) || - __put_user(ktime.tv_usec, &up->tv_usec)) - err = -EFAULT; - } - return err; -} - -static int compat_sock_getsockopt(struct socket *sock, int level, int optname, - char __user *optval, int __user *optlen) -{ - if (!COMPAT_USE_64BIT_TIME && - (optname == SO_RCVTIMEO || optname == SO_SNDTIMEO)) - return do_get_sock_timeout(sock, level, optname, optval, optlen); - return sock_getsockopt(sock, level, optname, optval, optlen); -} - int compat_sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp) { struct compat_timeval __user *ctv; @@ -527,7 +463,7 @@ static int __compat_sys_getsockopt(int fd, int level, int optname, } if (level == SOL_SOCKET) - err = compat_sock_getsockopt(sock, level, + err = sock_getsockopt(sock, level, optname, optval, optlen); else if (sock->ops->compat_getsockopt) err = sock->ops->compat_getsockopt(sock, level, diff --git a/net/core/Makefile b/net/core/Makefile index fccd31e0e7f7..f97d6254e564 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -11,7 +11,7 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \ neighbour.o rtnetlink.o utils.o link_watch.o filter.o \ sock_diag.o dev_ioctl.o tso.o sock_reuseport.o \ - fib_notifier.o xdp.o + fib_notifier.o xdp.o flow_offload.o obj-y += net-sysfs.o obj-$(CONFIG_PAGE_POOL) += page_pool.o diff --git a/net/core/dev.c b/net/core/dev.c index 82f20022259d..ecbe419e05ab 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7878,6 +7878,63 @@ int dev_get_phys_port_name(struct net_device *dev, EXPORT_SYMBOL(dev_get_phys_port_name); /** + * dev_get_port_parent_id - Get the device's port parent identifier + * @dev: network device + * @ppid: pointer to a storage for the port's parent identifier + * @recurse: allow/disallow recursion to lower devices + * + * Get the devices's port parent identifier + */ +int dev_get_port_parent_id(struct net_device *dev, + struct netdev_phys_item_id *ppid, + bool recurse) +{ + const struct net_device_ops *ops = dev->netdev_ops; + struct netdev_phys_item_id first = { }; + struct net_device *lower_dev; + struct list_head *iter; + int err = -EOPNOTSUPP; + + if (ops->ndo_get_port_parent_id) + return ops->ndo_get_port_parent_id(dev, ppid); + + if (!recurse) + return err; + + netdev_for_each_lower_dev(dev, lower_dev, iter) { + err = dev_get_port_parent_id(lower_dev, ppid, recurse); + if (err) + break; + if (!first.id_len) + first = *ppid; + else if (memcmp(&first, ppid, sizeof(*ppid))) + return -ENODATA; + } + + return err; +} +EXPORT_SYMBOL(dev_get_port_parent_id); + +/** + * netdev_port_same_parent_id - Indicate if two network devices have + * the same port parent identifier + * @a: first network device + * @b: second network device + */ +bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b) +{ + struct netdev_phys_item_id a_id = { }; + struct netdev_phys_item_id b_id = { }; + + if (dev_get_port_parent_id(a, &a_id, true) || + dev_get_port_parent_id(b, &b_id, true)) + return false; + + return netdev_phys_item_id_same(&a_id, &b_id); +} +EXPORT_SYMBOL(netdev_port_same_parent_id); + +/** * dev_change_proto_down - update protocol port state information * @dev: device * @proto_down: new value @@ -7976,35 +8033,41 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, enum bpf_netdev_command query; struct bpf_prog *prog = NULL; bpf_op_t bpf_op, bpf_chk; + bool offload; int err; ASSERT_RTNL(); - query = flags & XDP_FLAGS_HW_MODE ? XDP_QUERY_PROG_HW : XDP_QUERY_PROG; + offload = flags & XDP_FLAGS_HW_MODE; + query = offload ? XDP_QUERY_PROG_HW : XDP_QUERY_PROG; bpf_op = bpf_chk = ops->ndo_bpf; - if (!bpf_op && (flags & (XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE))) + if (!bpf_op && (flags & (XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE))) { + NL_SET_ERR_MSG(extack, "underlying driver does not support XDP in native mode"); return -EOPNOTSUPP; + } if (!bpf_op || (flags & XDP_FLAGS_SKB_MODE)) bpf_op = generic_xdp_install; if (bpf_op == bpf_chk) bpf_chk = generic_xdp_install; if (fd >= 0) { - if (__dev_xdp_query(dev, bpf_chk, XDP_QUERY_PROG) || - __dev_xdp_query(dev, bpf_chk, XDP_QUERY_PROG_HW)) + if (!offload && __dev_xdp_query(dev, bpf_chk, XDP_QUERY_PROG)) { + NL_SET_ERR_MSG(extack, "native and generic XDP can't be active at the same time"); return -EEXIST; + } if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && - __dev_xdp_query(dev, bpf_op, query)) + __dev_xdp_query(dev, bpf_op, query)) { + NL_SET_ERR_MSG(extack, "XDP program already attached"); return -EBUSY; + } prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP, bpf_op == ops->ndo_bpf); if (IS_ERR(prog)) return PTR_ERR(prog); - if (!(flags & XDP_FLAGS_HW_MODE) && - bpf_prog_is_dev_bound(prog->aux)) { + if (!offload && bpf_prog_is_dev_bound(prog->aux)) { NL_SET_ERR_MSG(extack, "using device-bound program without HW_MODE flag is not supported"); bpf_prog_put(prog); return -EINVAL; @@ -8712,6 +8775,9 @@ int init_dummy_netdev(struct net_device *dev) set_bit(__LINK_STATE_PRESENT, &dev->state); set_bit(__LINK_STATE_START, &dev->state); + /* napi_busy_loop stats accounting wants this */ + dev_net_set(dev, &init_net); + /* Note : We dont allocate pcpu_refcnt for dummy devices, * because users of this 'device' dont need to change * its refcount. diff --git a/net/core/devlink.c b/net/core/devlink.c index abb0da9d7b4b..04d98550c78c 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -81,6 +81,7 @@ struct devlink_dpipe_header devlink_dpipe_header_ipv6 = { EXPORT_SYMBOL(devlink_dpipe_header_ipv6); EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwmsg); +EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwerr); static LIST_HEAD(devlink_list); @@ -115,6 +116,8 @@ static struct devlink *devlink_get_from_attrs(struct net *net, busname = nla_data(attrs[DEVLINK_ATTR_BUS_NAME]); devname = nla_data(attrs[DEVLINK_ATTR_DEV_NAME]); + lockdep_assert_held(&devlink_mutex); + list_for_each_entry(devlink, &devlink_list, list) { if (strcmp(devlink->dev->bus->name, busname) == 0 && strcmp(dev_name(devlink->dev), devname) == 0 && @@ -932,6 +935,9 @@ static int devlink_nl_sb_pool_fill(struct sk_buff *msg, struct devlink *devlink, if (nla_put_u8(msg, DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE, pool_info.threshold_type)) goto nla_put_failure; + if (nla_put_u32(msg, DEVLINK_ATTR_SB_POOL_CELL_SIZE, + pool_info.cell_size)) + goto nla_put_failure; genlmsg_end(msg, hdr); return 0; @@ -2656,6 +2662,27 @@ static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info) return devlink->ops->reload(devlink, info->extack); } +static int devlink_nl_cmd_flash_update(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + const char *file_name, *component; + struct nlattr *nla_component; + + if (!devlink->ops->flash_update) + return -EOPNOTSUPP; + + if (!info->attrs[DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME]) + return -EINVAL; + file_name = nla_data(info->attrs[DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME]); + + nla_component = info->attrs[DEVLINK_ATTR_FLASH_UPDATE_COMPONENT]; + component = nla_component ? nla_data(nla_component) : NULL; + + return devlink->ops->flash_update(devlink, file_name, component, + info->extack); +} + static const struct devlink_param devlink_param_generic[] = { { .id = DEVLINK_PARAM_GENERIC_ID_INT_ERR_RESET, @@ -2843,11 +2870,13 @@ nla_put_failure: } static int devlink_nl_param_fill(struct sk_buff *msg, struct devlink *devlink, + unsigned int port_index, struct devlink_param_item *param_item, enum devlink_command cmd, u32 portid, u32 seq, int flags) { union devlink_param_value param_value[DEVLINK_PARAM_CMODE_MAX + 1]; + bool param_value_set[DEVLINK_PARAM_CMODE_MAX + 1] = {}; const struct devlink_param *param = param_item->param; struct devlink_param_gset_ctx ctx; struct nlattr *param_values_list; @@ -2866,12 +2895,15 @@ static int devlink_nl_param_fill(struct sk_buff *msg, struct devlink *devlink, return -EOPNOTSUPP; param_value[i] = param_item->driverinit_value; } else { + if (!param_item->published) + continue; ctx.cmode = i; err = devlink_param_get(devlink, param, &ctx); if (err) return err; param_value[i] = ctx.val; } + param_value_set[i] = true; } hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); @@ -2880,6 +2912,13 @@ static int devlink_nl_param_fill(struct sk_buff *msg, struct devlink *devlink, if (devlink_nl_put_handle(msg, devlink)) goto genlmsg_cancel; + + if (cmd == DEVLINK_CMD_PORT_PARAM_GET || + cmd == DEVLINK_CMD_PORT_PARAM_NEW || + cmd == DEVLINK_CMD_PORT_PARAM_DEL) + if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, port_index)) + goto genlmsg_cancel; + param_attr = nla_nest_start(msg, DEVLINK_ATTR_PARAM); if (!param_attr) goto genlmsg_cancel; @@ -2899,7 +2938,7 @@ static int devlink_nl_param_fill(struct sk_buff *msg, struct devlink *devlink, goto param_nest_cancel; for (i = 0; i <= DEVLINK_PARAM_CMODE_MAX; i++) { - if (!devlink_param_cmode_is_supported(param, i)) + if (!param_value_set[i]) continue; err = devlink_nl_param_value_fill_one(msg, param->type, i, param_value[i]); @@ -2922,18 +2961,22 @@ genlmsg_cancel: } static void devlink_param_notify(struct devlink *devlink, + unsigned int port_index, struct devlink_param_item *param_item, enum devlink_command cmd) { struct sk_buff *msg; int err; - WARN_ON(cmd != DEVLINK_CMD_PARAM_NEW && cmd != DEVLINK_CMD_PARAM_DEL); + WARN_ON(cmd != DEVLINK_CMD_PARAM_NEW && cmd != DEVLINK_CMD_PARAM_DEL && + cmd != DEVLINK_CMD_PORT_PARAM_NEW && + cmd != DEVLINK_CMD_PORT_PARAM_DEL); msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; - err = devlink_nl_param_fill(msg, devlink, param_item, cmd, 0, 0, 0); + err = devlink_nl_param_fill(msg, devlink, port_index, param_item, cmd, + 0, 0, 0); if (err) { nlmsg_free(msg); return; @@ -2962,7 +3005,7 @@ static int devlink_nl_cmd_param_get_dumpit(struct sk_buff *msg, idx++; continue; } - err = devlink_nl_param_fill(msg, devlink, param_item, + err = devlink_nl_param_fill(msg, devlink, 0, param_item, DEVLINK_CMD_PARAM_GET, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, @@ -3051,7 +3094,7 @@ devlink_param_value_get_from_info(const struct devlink_param *param, } static struct devlink_param_item * -devlink_param_get_from_info(struct devlink *devlink, +devlink_param_get_from_info(struct list_head *param_list, struct genl_info *info) { char *param_name; @@ -3060,7 +3103,7 @@ devlink_param_get_from_info(struct devlink *devlink, return NULL; param_name = nla_data(info->attrs[DEVLINK_ATTR_PARAM_NAME]); - return devlink_param_find_by_name(&devlink->param_list, param_name); + return devlink_param_find_by_name(param_list, param_name); } static int devlink_nl_cmd_param_get_doit(struct sk_buff *skb, @@ -3071,7 +3114,7 @@ static int devlink_nl_cmd_param_get_doit(struct sk_buff *skb, struct sk_buff *msg; int err; - param_item = devlink_param_get_from_info(devlink, info); + param_item = devlink_param_get_from_info(&devlink->param_list, info); if (!param_item) return -EINVAL; @@ -3079,7 +3122,7 @@ static int devlink_nl_cmd_param_get_doit(struct sk_buff *skb, if (!msg) return -ENOMEM; - err = devlink_nl_param_fill(msg, devlink, param_item, + err = devlink_nl_param_fill(msg, devlink, 0, param_item, DEVLINK_CMD_PARAM_GET, info->snd_portid, info->snd_seq, 0); if (err) { @@ -3090,10 +3133,12 @@ static int devlink_nl_cmd_param_get_doit(struct sk_buff *skb, return genlmsg_reply(msg, info); } -static int devlink_nl_cmd_param_set_doit(struct sk_buff *skb, - struct genl_info *info) +static int __devlink_nl_cmd_param_set_doit(struct devlink *devlink, + unsigned int port_index, + struct list_head *param_list, + struct genl_info *info, + enum devlink_command cmd) { - struct devlink *devlink = info->user_ptr[0]; enum devlink_param_type param_type; struct devlink_param_gset_ctx ctx; enum devlink_param_cmode cmode; @@ -3102,7 +3147,7 @@ static int devlink_nl_cmd_param_set_doit(struct sk_buff *skb, union devlink_param_value value; int err = 0; - param_item = devlink_param_get_from_info(devlink, info); + param_item = devlink_param_get_from_info(param_list, info); if (!param_item) return -EINVAL; param = param_item->param; @@ -3142,17 +3187,28 @@ static int devlink_nl_cmd_param_set_doit(struct sk_buff *skb, return err; } - devlink_param_notify(devlink, param_item, DEVLINK_CMD_PARAM_NEW); + devlink_param_notify(devlink, port_index, param_item, cmd); return 0; } +static int devlink_nl_cmd_param_set_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + + return __devlink_nl_cmd_param_set_doit(devlink, 0, &devlink->param_list, + info, DEVLINK_CMD_PARAM_NEW); +} + static int devlink_param_register_one(struct devlink *devlink, - const struct devlink_param *param) + unsigned int port_index, + struct list_head *param_list, + const struct devlink_param *param, + enum devlink_command cmd) { struct devlink_param_item *param_item; - if (devlink_param_find_by_name(&devlink->param_list, - param->name)) + if (devlink_param_find_by_name(param_list, param->name)) return -EEXIST; if (param->supported_cmodes == BIT(DEVLINK_PARAM_CMODE_DRIVERINIT)) @@ -3165,24 +3221,111 @@ static int devlink_param_register_one(struct devlink *devlink, return -ENOMEM; param_item->param = param; - list_add_tail(¶m_item->list, &devlink->param_list); - devlink_param_notify(devlink, param_item, DEVLINK_CMD_PARAM_NEW); + list_add_tail(¶m_item->list, param_list); + devlink_param_notify(devlink, port_index, param_item, cmd); return 0; } static void devlink_param_unregister_one(struct devlink *devlink, - const struct devlink_param *param) + unsigned int port_index, + struct list_head *param_list, + const struct devlink_param *param, + enum devlink_command cmd) { struct devlink_param_item *param_item; - param_item = devlink_param_find_by_name(&devlink->param_list, - param->name); + param_item = devlink_param_find_by_name(param_list, param->name); WARN_ON(!param_item); - devlink_param_notify(devlink, param_item, DEVLINK_CMD_PARAM_DEL); + devlink_param_notify(devlink, port_index, param_item, cmd); list_del(¶m_item->list); kfree(param_item); } +static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg, + struct netlink_callback *cb) +{ + struct devlink_param_item *param_item; + struct devlink_port *devlink_port; + struct devlink *devlink; + int start = cb->args[0]; + int idx = 0; + int err; + + mutex_lock(&devlink_mutex); + list_for_each_entry(devlink, &devlink_list, list) { + if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + continue; + mutex_lock(&devlink->lock); + list_for_each_entry(devlink_port, &devlink->port_list, list) { + list_for_each_entry(param_item, + &devlink_port->param_list, list) { + if (idx < start) { + idx++; + continue; + } + err = devlink_nl_param_fill(msg, + devlink_port->devlink, + devlink_port->index, param_item, + DEVLINK_CMD_PORT_PARAM_GET, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI); + if (err) { + mutex_unlock(&devlink->lock); + goto out; + } + idx++; + } + } + mutex_unlock(&devlink->lock); + } +out: + mutex_unlock(&devlink_mutex); + + cb->args[0] = idx; + return msg->len; +} + +static int devlink_nl_cmd_port_param_get_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink_port *devlink_port = info->user_ptr[0]; + struct devlink_param_item *param_item; + struct sk_buff *msg; + int err; + + param_item = devlink_param_get_from_info(&devlink_port->param_list, + info); + if (!param_item) + return -EINVAL; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + err = devlink_nl_param_fill(msg, devlink_port->devlink, + devlink_port->index, param_item, + DEVLINK_CMD_PORT_PARAM_GET, + info->snd_portid, info->snd_seq, 0); + if (err) { + nlmsg_free(msg); + return err; + } + + return genlmsg_reply(msg, info); +} + +static int devlink_nl_cmd_port_param_set_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink_port *devlink_port = info->user_ptr[0]; + + return __devlink_nl_cmd_param_set_doit(devlink_port->devlink, + devlink_port->index, + &devlink_port->param_list, info, + DEVLINK_CMD_PORT_PARAM_NEW); +} + static int devlink_nl_region_snapshot_id_put(struct sk_buff *msg, struct devlink *devlink, struct devlink_snapshot *snapshot) @@ -3504,44 +3647,56 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { u64 ret_offset, start_offset, end_offset = 0; - struct nlattr *attrs[DEVLINK_ATTR_MAX + 1]; const struct genl_ops *ops = cb->data; struct devlink_region *region; struct nlattr *chunks_attr; const char *region_name; struct devlink *devlink; + struct nlattr **attrs; bool dump = true; void *hdr; int err; start_offset = *((u64 *)&cb->args[0]); + attrs = kmalloc_array(DEVLINK_ATTR_MAX + 1, sizeof(*attrs), GFP_KERNEL); + if (!attrs) + return -ENOMEM; + err = nlmsg_parse(cb->nlh, GENL_HDRLEN + devlink_nl_family.hdrsize, attrs, DEVLINK_ATTR_MAX, ops->policy, cb->extack); if (err) - goto out; + goto out_free; + mutex_lock(&devlink_mutex); devlink = devlink_get_from_attrs(sock_net(cb->skb->sk), attrs); - if (IS_ERR(devlink)) - goto out; + if (IS_ERR(devlink)) { + err = PTR_ERR(devlink); + goto out_dev; + } - mutex_lock(&devlink_mutex); mutex_lock(&devlink->lock); if (!attrs[DEVLINK_ATTR_REGION_NAME] || - !attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID]) + !attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID]) { + err = -EINVAL; goto out_unlock; + } region_name = nla_data(attrs[DEVLINK_ATTR_REGION_NAME]); region = devlink_region_get_by_name(devlink, region_name); - if (!region) + if (!region) { + err = -EINVAL; goto out_unlock; + } hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &devlink_nl_family, NLM_F_ACK | NLM_F_MULTI, DEVLINK_CMD_REGION_READ); - if (!hdr) + if (!hdr) { + err = -EMSGSIZE; goto out_unlock; + } err = devlink_nl_put_handle(skb, devlink); if (err) @@ -3552,8 +3707,10 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, goto nla_put_failure; chunks_attr = nla_nest_start(skb, DEVLINK_ATTR_REGION_CHUNKS); - if (!chunks_attr) + if (!chunks_attr) { + err = -EMSGSIZE; goto nla_put_failure; + } if (attrs[DEVLINK_ATTR_REGION_CHUNK_ADDR] && attrs[DEVLINK_ATTR_REGION_CHUNK_LEN]) { @@ -3576,8 +3733,10 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, goto nla_put_failure; /* Check if there was any progress done to prevent infinite loop */ - if (ret_offset == start_offset) + if (ret_offset == start_offset) { + err = -EINVAL; goto nla_put_failure; + } *((u64 *)&cb->args[0]) = ret_offset; @@ -3585,6 +3744,7 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, genlmsg_end(skb, hdr); mutex_unlock(&devlink->lock); mutex_unlock(&devlink_mutex); + kfree(attrs); return skb->len; @@ -3592,8 +3752,1127 @@ nla_put_failure: genlmsg_cancel(skb, hdr); out_unlock: mutex_unlock(&devlink->lock); +out_dev: + mutex_unlock(&devlink_mutex); +out_free: + kfree(attrs); + return err; +} + +struct devlink_info_req { + struct sk_buff *msg; +}; + +int devlink_info_driver_name_put(struct devlink_info_req *req, const char *name) +{ + return nla_put_string(req->msg, DEVLINK_ATTR_INFO_DRIVER_NAME, name); +} +EXPORT_SYMBOL_GPL(devlink_info_driver_name_put); + +int devlink_info_serial_number_put(struct devlink_info_req *req, const char *sn) +{ + return nla_put_string(req->msg, DEVLINK_ATTR_INFO_SERIAL_NUMBER, sn); +} +EXPORT_SYMBOL_GPL(devlink_info_serial_number_put); + +static int devlink_info_version_put(struct devlink_info_req *req, int attr, + const char *version_name, + const char *version_value) +{ + struct nlattr *nest; + int err; + + nest = nla_nest_start(req->msg, attr); + if (!nest) + return -EMSGSIZE; + + err = nla_put_string(req->msg, DEVLINK_ATTR_INFO_VERSION_NAME, + version_name); + if (err) + goto nla_put_failure; + + err = nla_put_string(req->msg, DEVLINK_ATTR_INFO_VERSION_VALUE, + version_value); + if (err) + goto nla_put_failure; + + nla_nest_end(req->msg, nest); + + return 0; + +nla_put_failure: + nla_nest_cancel(req->msg, nest); + return err; +} + +int devlink_info_version_fixed_put(struct devlink_info_req *req, + const char *version_name, + const char *version_value) +{ + return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_FIXED, + version_name, version_value); +} +EXPORT_SYMBOL_GPL(devlink_info_version_fixed_put); + +int devlink_info_version_stored_put(struct devlink_info_req *req, + const char *version_name, + const char *version_value) +{ + return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_STORED, + version_name, version_value); +} +EXPORT_SYMBOL_GPL(devlink_info_version_stored_put); + +int devlink_info_version_running_put(struct devlink_info_req *req, + const char *version_name, + const char *version_value) +{ + return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_RUNNING, + version_name, version_value); +} +EXPORT_SYMBOL_GPL(devlink_info_version_running_put); + +static int +devlink_nl_info_fill(struct sk_buff *msg, struct devlink *devlink, + enum devlink_command cmd, u32 portid, + u32 seq, int flags, struct netlink_ext_ack *extack) +{ + struct devlink_info_req req; + void *hdr; + int err; + + hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); + if (!hdr) + return -EMSGSIZE; + + err = -EMSGSIZE; + if (devlink_nl_put_handle(msg, devlink)) + goto err_cancel_msg; + + req.msg = msg; + err = devlink->ops->info_get(devlink, &req, extack); + if (err) + goto err_cancel_msg; + + genlmsg_end(msg, hdr); + return 0; + +err_cancel_msg: + genlmsg_cancel(msg, hdr); + return err; +} + +static int devlink_nl_cmd_info_get_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + struct sk_buff *msg; + int err; + + if (!devlink->ops || !devlink->ops->info_get) + return -EOPNOTSUPP; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + err = devlink_nl_info_fill(msg, devlink, DEVLINK_CMD_INFO_GET, + info->snd_portid, info->snd_seq, 0, + info->extack); + if (err) { + nlmsg_free(msg); + return err; + } + + return genlmsg_reply(msg, info); +} + +static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg, + struct netlink_callback *cb) +{ + struct devlink *devlink; + int start = cb->args[0]; + int idx = 0; + int err; + + mutex_lock(&devlink_mutex); + list_for_each_entry(devlink, &devlink_list, list) { + if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + continue; + if (idx < start) { + idx++; + continue; + } + + mutex_lock(&devlink->lock); + err = devlink_nl_info_fill(msg, devlink, DEVLINK_CMD_INFO_GET, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, + cb->extack); + mutex_unlock(&devlink->lock); + if (err) + break; + idx++; + } + mutex_unlock(&devlink_mutex); + + cb->args[0] = idx; + return msg->len; +} + +struct devlink_fmsg_item { + struct list_head list; + int attrtype; + u8 nla_type; + u16 len; + int value[0]; +}; + +struct devlink_fmsg { + struct list_head item_list; +}; + +static struct devlink_fmsg *devlink_fmsg_alloc(void) +{ + struct devlink_fmsg *fmsg; + + fmsg = kzalloc(sizeof(*fmsg), GFP_KERNEL); + if (!fmsg) + return NULL; + + INIT_LIST_HEAD(&fmsg->item_list); + + return fmsg; +} + +static void devlink_fmsg_free(struct devlink_fmsg *fmsg) +{ + struct devlink_fmsg_item *item, *tmp; + + list_for_each_entry_safe(item, tmp, &fmsg->item_list, list) { + list_del(&item->list); + kfree(item); + } + kfree(fmsg); +} + +static int devlink_fmsg_nest_common(struct devlink_fmsg *fmsg, + int attrtype) +{ + struct devlink_fmsg_item *item; + + item = kzalloc(sizeof(*item), GFP_KERNEL); + if (!item) + return -ENOMEM; + + item->attrtype = attrtype; + list_add_tail(&item->list, &fmsg->item_list); + + return 0; +} + +int devlink_fmsg_obj_nest_start(struct devlink_fmsg *fmsg) +{ + return devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_OBJ_NEST_START); +} +EXPORT_SYMBOL_GPL(devlink_fmsg_obj_nest_start); + +static int devlink_fmsg_nest_end(struct devlink_fmsg *fmsg) +{ + return devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_NEST_END); +} + +int devlink_fmsg_obj_nest_end(struct devlink_fmsg *fmsg) +{ + return devlink_fmsg_nest_end(fmsg); +} +EXPORT_SYMBOL_GPL(devlink_fmsg_obj_nest_end); + +#define DEVLINK_FMSG_MAX_SIZE (GENLMSG_DEFAULT_SIZE - GENL_HDRLEN - NLA_HDRLEN) + +static int devlink_fmsg_put_name(struct devlink_fmsg *fmsg, const char *name) +{ + struct devlink_fmsg_item *item; + + if (strlen(name) + 1 > DEVLINK_FMSG_MAX_SIZE) + return -EMSGSIZE; + + item = kzalloc(sizeof(*item) + strlen(name) + 1, GFP_KERNEL); + if (!item) + return -ENOMEM; + + item->nla_type = NLA_NUL_STRING; + item->len = strlen(name) + 1; + item->attrtype = DEVLINK_ATTR_FMSG_OBJ_NAME; + memcpy(&item->value, name, item->len); + list_add_tail(&item->list, &fmsg->item_list); + + return 0; +} + +int devlink_fmsg_pair_nest_start(struct devlink_fmsg *fmsg, const char *name) +{ + int err; + + err = devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_PAIR_NEST_START); + if (err) + return err; + + err = devlink_fmsg_put_name(fmsg, name); + if (err) + return err; + + return 0; +} +EXPORT_SYMBOL_GPL(devlink_fmsg_pair_nest_start); + +int devlink_fmsg_pair_nest_end(struct devlink_fmsg *fmsg) +{ + return devlink_fmsg_nest_end(fmsg); +} +EXPORT_SYMBOL_GPL(devlink_fmsg_pair_nest_end); + +int devlink_fmsg_arr_pair_nest_start(struct devlink_fmsg *fmsg, + const char *name) +{ + int err; + + err = devlink_fmsg_pair_nest_start(fmsg, name); + if (err) + return err; + + err = devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_ARR_NEST_START); + if (err) + return err; + + return 0; +} +EXPORT_SYMBOL_GPL(devlink_fmsg_arr_pair_nest_start); + +int devlink_fmsg_arr_pair_nest_end(struct devlink_fmsg *fmsg) +{ + int err; + + err = devlink_fmsg_nest_end(fmsg); + if (err) + return err; + + err = devlink_fmsg_nest_end(fmsg); + if (err) + return err; + + return 0; +} +EXPORT_SYMBOL_GPL(devlink_fmsg_arr_pair_nest_end); + +static int devlink_fmsg_put_value(struct devlink_fmsg *fmsg, + const void *value, u16 value_len, + u8 value_nla_type) +{ + struct devlink_fmsg_item *item; + + if (value_len > DEVLINK_FMSG_MAX_SIZE) + return -EMSGSIZE; + + item = kzalloc(sizeof(*item) + value_len, GFP_KERNEL); + if (!item) + return -ENOMEM; + + item->nla_type = value_nla_type; + item->len = value_len; + item->attrtype = DEVLINK_ATTR_FMSG_OBJ_VALUE_DATA; + memcpy(&item->value, value, item->len); + list_add_tail(&item->list, &fmsg->item_list); + + return 0; +} + +int devlink_fmsg_bool_put(struct devlink_fmsg *fmsg, bool value) +{ + return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_FLAG); +} +EXPORT_SYMBOL_GPL(devlink_fmsg_bool_put); + +int devlink_fmsg_u8_put(struct devlink_fmsg *fmsg, u8 value) +{ + return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U8); +} +EXPORT_SYMBOL_GPL(devlink_fmsg_u8_put); + +int devlink_fmsg_u32_put(struct devlink_fmsg *fmsg, u32 value) +{ + return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U32); +} +EXPORT_SYMBOL_GPL(devlink_fmsg_u32_put); + +int devlink_fmsg_u64_put(struct devlink_fmsg *fmsg, u64 value) +{ + return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U64); +} +EXPORT_SYMBOL_GPL(devlink_fmsg_u64_put); + +int devlink_fmsg_string_put(struct devlink_fmsg *fmsg, const char *value) +{ + return devlink_fmsg_put_value(fmsg, value, strlen(value) + 1, + NLA_NUL_STRING); +} +EXPORT_SYMBOL_GPL(devlink_fmsg_string_put); + +int devlink_fmsg_binary_put(struct devlink_fmsg *fmsg, const void *value, + u16 value_len) +{ + return devlink_fmsg_put_value(fmsg, value, value_len, NLA_BINARY); +} +EXPORT_SYMBOL_GPL(devlink_fmsg_binary_put); + +int devlink_fmsg_bool_pair_put(struct devlink_fmsg *fmsg, const char *name, + bool value) +{ + int err; + + err = devlink_fmsg_pair_nest_start(fmsg, name); + if (err) + return err; + + err = devlink_fmsg_bool_put(fmsg, value); + if (err) + return err; + + err = devlink_fmsg_pair_nest_end(fmsg); + if (err) + return err; + + return 0; +} +EXPORT_SYMBOL_GPL(devlink_fmsg_bool_pair_put); + +int devlink_fmsg_u8_pair_put(struct devlink_fmsg *fmsg, const char *name, + u8 value) +{ + int err; + + err = devlink_fmsg_pair_nest_start(fmsg, name); + if (err) + return err; + + err = devlink_fmsg_u8_put(fmsg, value); + if (err) + return err; + + err = devlink_fmsg_pair_nest_end(fmsg); + if (err) + return err; + + return 0; +} +EXPORT_SYMBOL_GPL(devlink_fmsg_u8_pair_put); + +int devlink_fmsg_u32_pair_put(struct devlink_fmsg *fmsg, const char *name, + u32 value) +{ + int err; + + err = devlink_fmsg_pair_nest_start(fmsg, name); + if (err) + return err; + + err = devlink_fmsg_u32_put(fmsg, value); + if (err) + return err; + + err = devlink_fmsg_pair_nest_end(fmsg); + if (err) + return err; + + return 0; +} +EXPORT_SYMBOL_GPL(devlink_fmsg_u32_pair_put); + +int devlink_fmsg_u64_pair_put(struct devlink_fmsg *fmsg, const char *name, + u64 value) +{ + int err; + + err = devlink_fmsg_pair_nest_start(fmsg, name); + if (err) + return err; + + err = devlink_fmsg_u64_put(fmsg, value); + if (err) + return err; + + err = devlink_fmsg_pair_nest_end(fmsg); + if (err) + return err; + + return 0; +} +EXPORT_SYMBOL_GPL(devlink_fmsg_u64_pair_put); + +int devlink_fmsg_string_pair_put(struct devlink_fmsg *fmsg, const char *name, + const char *value) +{ + int err; + + err = devlink_fmsg_pair_nest_start(fmsg, name); + if (err) + return err; + + err = devlink_fmsg_string_put(fmsg, value); + if (err) + return err; + + err = devlink_fmsg_pair_nest_end(fmsg); + if (err) + return err; + + return 0; +} +EXPORT_SYMBOL_GPL(devlink_fmsg_string_pair_put); + +int devlink_fmsg_binary_pair_put(struct devlink_fmsg *fmsg, const char *name, + const void *value, u16 value_len) +{ + int err; + + err = devlink_fmsg_pair_nest_start(fmsg, name); + if (err) + return err; + + err = devlink_fmsg_binary_put(fmsg, value, value_len); + if (err) + return err; + + err = devlink_fmsg_pair_nest_end(fmsg); + if (err) + return err; + + return 0; +} +EXPORT_SYMBOL_GPL(devlink_fmsg_binary_pair_put); + +static int +devlink_fmsg_item_fill_type(struct devlink_fmsg_item *msg, struct sk_buff *skb) +{ + switch (msg->nla_type) { + case NLA_FLAG: + case NLA_U8: + case NLA_U32: + case NLA_U64: + case NLA_NUL_STRING: + case NLA_BINARY: + return nla_put_u8(skb, DEVLINK_ATTR_FMSG_OBJ_VALUE_TYPE, + msg->nla_type); + default: + return -EINVAL; + } +} + +static int +devlink_fmsg_item_fill_data(struct devlink_fmsg_item *msg, struct sk_buff *skb) +{ + int attrtype = DEVLINK_ATTR_FMSG_OBJ_VALUE_DATA; + u8 tmp; + + switch (msg->nla_type) { + case NLA_FLAG: + /* Always provide flag data, regardless of its value */ + tmp = *(bool *) msg->value; + + return nla_put_u8(skb, attrtype, tmp); + case NLA_U8: + return nla_put_u8(skb, attrtype, *(u8 *) msg->value); + case NLA_U32: + return nla_put_u32(skb, attrtype, *(u32 *) msg->value); + case NLA_U64: + return nla_put_u64_64bit(skb, attrtype, *(u64 *) msg->value, + DEVLINK_ATTR_PAD); + case NLA_NUL_STRING: + return nla_put_string(skb, attrtype, (char *) &msg->value); + case NLA_BINARY: + return nla_put(skb, attrtype, msg->len, (void *) &msg->value); + default: + return -EINVAL; + } +} + +static int +devlink_fmsg_prepare_skb(struct devlink_fmsg *fmsg, struct sk_buff *skb, + int *start) +{ + struct devlink_fmsg_item *item; + struct nlattr *fmsg_nlattr; + int i = 0; + int err; + + fmsg_nlattr = nla_nest_start(skb, DEVLINK_ATTR_FMSG); + if (!fmsg_nlattr) + return -EMSGSIZE; + + list_for_each_entry(item, &fmsg->item_list, list) { + if (i < *start) { + i++; + continue; + } + + switch (item->attrtype) { + case DEVLINK_ATTR_FMSG_OBJ_NEST_START: + case DEVLINK_ATTR_FMSG_PAIR_NEST_START: + case DEVLINK_ATTR_FMSG_ARR_NEST_START: + case DEVLINK_ATTR_FMSG_NEST_END: + err = nla_put_flag(skb, item->attrtype); + break; + case DEVLINK_ATTR_FMSG_OBJ_VALUE_DATA: + err = devlink_fmsg_item_fill_type(item, skb); + if (err) + break; + err = devlink_fmsg_item_fill_data(item, skb); + break; + case DEVLINK_ATTR_FMSG_OBJ_NAME: + err = nla_put_string(skb, item->attrtype, + (char *) &item->value); + break; + default: + err = -EINVAL; + break; + } + if (!err) + *start = ++i; + else + break; + } + + nla_nest_end(skb, fmsg_nlattr); + return err; +} + +static int devlink_fmsg_snd(struct devlink_fmsg *fmsg, + struct genl_info *info, + enum devlink_command cmd, int flags) +{ + struct nlmsghdr *nlh; + struct sk_buff *skb; + bool last = false; + int index = 0; + void *hdr; + int err; + + while (!last) { + int tmp_index = index; + + skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq, + &devlink_nl_family, flags | NLM_F_MULTI, cmd); + if (!hdr) { + err = -EMSGSIZE; + goto nla_put_failure; + } + + err = devlink_fmsg_prepare_skb(fmsg, skb, &index); + if (!err) + last = true; + else if (err != -EMSGSIZE || tmp_index == index) + goto nla_put_failure; + + genlmsg_end(skb, hdr); + err = genlmsg_reply(skb, info); + if (err) + return err; + } + + skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!skb) + return -ENOMEM; + nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq, + NLMSG_DONE, 0, flags | NLM_F_MULTI); + if (!nlh) { + err = -EMSGSIZE; + goto nla_put_failure; + } + + return genlmsg_reply(skb, info); + +nla_put_failure: + nlmsg_free(skb); + return err; +} + +struct devlink_health_reporter { + struct list_head list; + void *priv; + const struct devlink_health_reporter_ops *ops; + struct devlink *devlink; + struct devlink_fmsg *dump_fmsg; + struct mutex dump_lock; /* lock parallel read/write from dump buffers */ + u64 graceful_period; + bool auto_recover; + u8 health_state; + u64 dump_ts; + u64 error_count; + u64 recovery_count; + u64 last_recovery_ts; +}; + +enum devlink_health_reporter_state { + DEVLINK_HEALTH_REPORTER_STATE_HEALTHY, + DEVLINK_HEALTH_REPORTER_STATE_ERROR, +}; + +void * +devlink_health_reporter_priv(struct devlink_health_reporter *reporter) +{ + return reporter->priv; +} +EXPORT_SYMBOL_GPL(devlink_health_reporter_priv); + +static struct devlink_health_reporter * +devlink_health_reporter_find_by_name(struct devlink *devlink, + const char *reporter_name) +{ + struct devlink_health_reporter *reporter; + + list_for_each_entry(reporter, &devlink->reporter_list, list) + if (!strcmp(reporter->ops->name, reporter_name)) + return reporter; + return NULL; +} + +/** + * devlink_health_reporter_create - create devlink health reporter + * + * @devlink: devlink + * @ops: ops + * @graceful_period: to avoid recovery loops, in msecs + * @auto_recover: auto recover when error occurs + * @priv: priv + */ +struct devlink_health_reporter * +devlink_health_reporter_create(struct devlink *devlink, + const struct devlink_health_reporter_ops *ops, + u64 graceful_period, bool auto_recover, + void *priv) +{ + struct devlink_health_reporter *reporter; + + mutex_lock(&devlink->lock); + if (devlink_health_reporter_find_by_name(devlink, ops->name)) { + reporter = ERR_PTR(-EEXIST); + goto unlock; + } + + if (WARN_ON(auto_recover && !ops->recover) || + WARN_ON(graceful_period && !ops->recover)) { + reporter = ERR_PTR(-EINVAL); + goto unlock; + } + + reporter = kzalloc(sizeof(*reporter), GFP_KERNEL); + if (!reporter) { + reporter = ERR_PTR(-ENOMEM); + goto unlock; + } + + reporter->priv = priv; + reporter->ops = ops; + reporter->devlink = devlink; + reporter->graceful_period = graceful_period; + reporter->auto_recover = auto_recover; + mutex_init(&reporter->dump_lock); + list_add_tail(&reporter->list, &devlink->reporter_list); +unlock: + mutex_unlock(&devlink->lock); + return reporter; +} +EXPORT_SYMBOL_GPL(devlink_health_reporter_create); + +/** + * devlink_health_reporter_destroy - destroy devlink health reporter + * + * @reporter: devlink health reporter to destroy + */ +void +devlink_health_reporter_destroy(struct devlink_health_reporter *reporter) +{ + mutex_lock(&reporter->devlink->lock); + list_del(&reporter->list); + mutex_unlock(&reporter->devlink->lock); + if (reporter->dump_fmsg) + devlink_fmsg_free(reporter->dump_fmsg); + kfree(reporter); +} +EXPORT_SYMBOL_GPL(devlink_health_reporter_destroy); + +static int +devlink_health_reporter_recover(struct devlink_health_reporter *reporter, + void *priv_ctx) +{ + int err; + + if (!reporter->ops->recover) + return -EOPNOTSUPP; + + err = reporter->ops->recover(reporter, priv_ctx); + if (err) + return err; + + reporter->recovery_count++; + reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_HEALTHY; + reporter->last_recovery_ts = jiffies; + + return 0; +} + +static void +devlink_health_dump_clear(struct devlink_health_reporter *reporter) +{ + if (!reporter->dump_fmsg) + return; + devlink_fmsg_free(reporter->dump_fmsg); + reporter->dump_fmsg = NULL; +} + +static int devlink_health_do_dump(struct devlink_health_reporter *reporter, + void *priv_ctx) +{ + int err; + + if (!reporter->ops->dump) + return 0; + + if (reporter->dump_fmsg) + return 0; + + reporter->dump_fmsg = devlink_fmsg_alloc(); + if (!reporter->dump_fmsg) { + err = -ENOMEM; + return err; + } + + err = devlink_fmsg_obj_nest_start(reporter->dump_fmsg); + if (err) + goto dump_err; + + err = reporter->ops->dump(reporter, reporter->dump_fmsg, + priv_ctx); + if (err) + goto dump_err; + + err = devlink_fmsg_obj_nest_end(reporter->dump_fmsg); + if (err) + goto dump_err; + + reporter->dump_ts = jiffies; + + return 0; + +dump_err: + devlink_health_dump_clear(reporter); + return err; +} + +int devlink_health_report(struct devlink_health_reporter *reporter, + const char *msg, void *priv_ctx) +{ + struct devlink *devlink = reporter->devlink; + + /* write a log message of the current error */ + WARN_ON(!msg); + trace_devlink_health_report(devlink, reporter->ops->name, msg); + reporter->error_count++; + + /* abort if the previous error wasn't recovered */ + if (reporter->auto_recover && + (reporter->health_state != DEVLINK_HEALTH_REPORTER_STATE_HEALTHY || + jiffies - reporter->last_recovery_ts < + msecs_to_jiffies(reporter->graceful_period))) { + trace_devlink_health_recover_aborted(devlink, + reporter->ops->name, + reporter->health_state, + jiffies - + reporter->last_recovery_ts); + return -ECANCELED; + } + + reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_ERROR; + + mutex_lock(&reporter->dump_lock); + /* store current dump of current error, for later analysis */ + devlink_health_do_dump(reporter, priv_ctx); + mutex_unlock(&reporter->dump_lock); + + if (reporter->auto_recover) + return devlink_health_reporter_recover(reporter, priv_ctx); + + return 0; +} +EXPORT_SYMBOL_GPL(devlink_health_report); + +static struct devlink_health_reporter * +devlink_health_reporter_get_from_info(struct devlink *devlink, + struct genl_info *info) +{ + char *reporter_name; + + if (!info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME]) + return NULL; + + reporter_name = + nla_data(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME]); + return devlink_health_reporter_find_by_name(devlink, reporter_name); +} + +static int +devlink_nl_health_reporter_fill(struct sk_buff *msg, + struct devlink *devlink, + struct devlink_health_reporter *reporter, + enum devlink_command cmd, u32 portid, + u32 seq, int flags) +{ + struct nlattr *reporter_attr; + void *hdr; + + hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); + if (!hdr) + return -EMSGSIZE; + + if (devlink_nl_put_handle(msg, devlink)) + goto genlmsg_cancel; + + reporter_attr = nla_nest_start(msg, DEVLINK_ATTR_HEALTH_REPORTER); + if (!reporter_attr) + goto genlmsg_cancel; + if (nla_put_string(msg, DEVLINK_ATTR_HEALTH_REPORTER_NAME, + reporter->ops->name)) + goto reporter_nest_cancel; + if (nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_STATE, + reporter->health_state)) + goto reporter_nest_cancel; + if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_ERR, + reporter->error_count, DEVLINK_ATTR_PAD)) + goto reporter_nest_cancel; + if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_RECOVER, + reporter->recovery_count, DEVLINK_ATTR_PAD)) + goto reporter_nest_cancel; + if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD, + reporter->graceful_period, + DEVLINK_ATTR_PAD)) + goto reporter_nest_cancel; + if (nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER, + reporter->auto_recover)) + goto reporter_nest_cancel; + if (reporter->dump_fmsg && + nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS, + jiffies_to_msecs(reporter->dump_ts), + DEVLINK_ATTR_PAD)) + goto reporter_nest_cancel; + + nla_nest_end(msg, reporter_attr); + genlmsg_end(msg, hdr); + return 0; + +reporter_nest_cancel: + nla_nest_end(msg, reporter_attr); +genlmsg_cancel: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +static int devlink_nl_cmd_health_reporter_get_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + struct devlink_health_reporter *reporter; + struct sk_buff *msg; + int err; + + reporter = devlink_health_reporter_get_from_info(devlink, info); + if (!reporter) + return -EINVAL; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + err = devlink_nl_health_reporter_fill(msg, devlink, reporter, + DEVLINK_CMD_HEALTH_REPORTER_GET, + info->snd_portid, info->snd_seq, + 0); + if (err) { + nlmsg_free(msg); + return err; + } + + return genlmsg_reply(msg, info); +} + +static int +devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg, + struct netlink_callback *cb) +{ + struct devlink_health_reporter *reporter; + struct devlink *devlink; + int start = cb->args[0]; + int idx = 0; + int err; + + mutex_lock(&devlink_mutex); + list_for_each_entry(devlink, &devlink_list, list) { + if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + continue; + mutex_lock(&devlink->lock); + list_for_each_entry(reporter, &devlink->reporter_list, + list) { + if (idx < start) { + idx++; + continue; + } + err = devlink_nl_health_reporter_fill(msg, devlink, + reporter, + DEVLINK_CMD_HEALTH_REPORTER_GET, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI); + if (err) { + mutex_unlock(&devlink->lock); + goto out; + } + idx++; + } + mutex_unlock(&devlink->lock); + } +out: mutex_unlock(&devlink_mutex); + + cb->args[0] = idx; + return msg->len; +} + +static int +devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + struct devlink_health_reporter *reporter; + + reporter = devlink_health_reporter_get_from_info(devlink, info); + if (!reporter) + return -EINVAL; + + if (!reporter->ops->recover && + (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] || + info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER])) + return -EOPNOTSUPP; + + if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD]) + reporter->graceful_period = + nla_get_u64(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD]); + + if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER]) + reporter->auto_recover = + nla_get_u8(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER]); + + return 0; +} + +static int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + struct devlink_health_reporter *reporter; + + reporter = devlink_health_reporter_get_from_info(devlink, info); + if (!reporter) + return -EINVAL; + + return devlink_health_reporter_recover(reporter, NULL); +} + +static int devlink_nl_cmd_health_reporter_diagnose_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + struct devlink_health_reporter *reporter; + struct devlink_fmsg *fmsg; + int err; + + reporter = devlink_health_reporter_get_from_info(devlink, info); + if (!reporter) + return -EINVAL; + + if (!reporter->ops->diagnose) + return -EOPNOTSUPP; + + fmsg = devlink_fmsg_alloc(); + if (!fmsg) + return -ENOMEM; + + err = devlink_fmsg_obj_nest_start(fmsg); + if (err) + goto out; + + err = reporter->ops->diagnose(reporter, fmsg); + if (err) + goto out; + + err = devlink_fmsg_obj_nest_end(fmsg); + if (err) + goto out; + + err = devlink_fmsg_snd(fmsg, info, + DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE, 0); + out: + devlink_fmsg_free(fmsg); + return err; +} + +static int devlink_nl_cmd_health_reporter_dump_get_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + struct devlink_health_reporter *reporter; + int err; + + reporter = devlink_health_reporter_get_from_info(devlink, info); + if (!reporter) + return -EINVAL; + + if (!reporter->ops->dump) + return -EOPNOTSUPP; + + mutex_lock(&reporter->dump_lock); + err = devlink_health_do_dump(reporter, NULL); + if (err) + goto out; + + err = devlink_fmsg_snd(reporter->dump_fmsg, info, + DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET, 0); + +out: + mutex_unlock(&reporter->dump_lock); + return err; +} + +static int +devlink_nl_cmd_health_reporter_dump_clear_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + struct devlink_health_reporter *reporter; + + reporter = devlink_health_reporter_get_from_info(devlink, info); + if (!reporter) + return -EINVAL; + + if (!reporter->ops->dump) + return -EOPNOTSUPP; + + mutex_lock(&reporter->dump_lock); + devlink_health_dump_clear(reporter); + mutex_unlock(&reporter->dump_lock); return 0; } @@ -3622,6 +4901,11 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_PARAM_VALUE_CMODE] = { .type = NLA_U8 }, [DEVLINK_ATTR_REGION_NAME] = { .type = NLA_NUL_STRING }, [DEVLINK_ATTR_REGION_SNAPSHOT_ID] = { .type = NLA_U32 }, + [DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING }, + [DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] = { .type = NLA_U64 }, + [DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER] = { .type = NLA_U8 }, + [DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME] = { .type = NLA_NUL_STRING }, + [DEVLINK_ATTR_FLASH_UPDATE_COMPONENT] = { .type = NLA_NUL_STRING }, }; static const struct genl_ops devlink_nl_ops[] = { @@ -3821,6 +5105,21 @@ static const struct genl_ops devlink_nl_ops[] = { .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, { + .cmd = DEVLINK_CMD_PORT_PARAM_GET, + .doit = devlink_nl_cmd_port_param_get_doit, + .dumpit = devlink_nl_cmd_port_param_get_dumpit, + .policy = devlink_nl_policy, + .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, + /* can be retrieved by unprivileged users */ + }, + { + .cmd = DEVLINK_CMD_PORT_PARAM_SET, + .doit = devlink_nl_cmd_port_param_set_doit, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, + }, + { .cmd = DEVLINK_CMD_REGION_GET, .doit = devlink_nl_cmd_region_get_doit, .dumpit = devlink_nl_cmd_region_get_dumpit, @@ -3842,6 +5141,66 @@ static const struct genl_ops devlink_nl_ops[] = { .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, + { + .cmd = DEVLINK_CMD_INFO_GET, + .doit = devlink_nl_cmd_info_get_doit, + .dumpit = devlink_nl_cmd_info_get_dumpit, + .policy = devlink_nl_policy, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + /* can be retrieved by unprivileged users */ + }, + { + .cmd = DEVLINK_CMD_HEALTH_REPORTER_GET, + .doit = devlink_nl_cmd_health_reporter_get_doit, + .dumpit = devlink_nl_cmd_health_reporter_get_dumpit, + .policy = devlink_nl_policy, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + /* can be retrieved by unprivileged users */ + }, + { + .cmd = DEVLINK_CMD_HEALTH_REPORTER_SET, + .doit = devlink_nl_cmd_health_reporter_set_doit, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + }, + { + .cmd = DEVLINK_CMD_HEALTH_REPORTER_RECOVER, + .doit = devlink_nl_cmd_health_reporter_recover_doit, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + }, + { + .cmd = DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE, + .doit = devlink_nl_cmd_health_reporter_diagnose_doit, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + }, + { + .cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET, + .doit = devlink_nl_cmd_health_reporter_dump_get_doit, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | + DEVLINK_NL_FLAG_NO_LOCK, + }, + { + .cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR, + .doit = devlink_nl_cmd_health_reporter_dump_clear_doit, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | + DEVLINK_NL_FLAG_NO_LOCK, + }, + { + .cmd = DEVLINK_CMD_FLASH_UPDATE, + .doit = devlink_nl_cmd_flash_update, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + }, }; static struct genl_family devlink_nl_family __ro_after_init = { @@ -3882,6 +5241,7 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size) INIT_LIST_HEAD(&devlink->resource_list); INIT_LIST_HEAD(&devlink->param_list); INIT_LIST_HEAD(&devlink->region_list); + INIT_LIST_HEAD(&devlink->reporter_list); mutex_init(&devlink->lock); return devlink; } @@ -3924,6 +5284,14 @@ EXPORT_SYMBOL_GPL(devlink_unregister); */ void devlink_free(struct devlink *devlink) { + WARN_ON(!list_empty(&devlink->reporter_list)); + WARN_ON(!list_empty(&devlink->region_list)); + WARN_ON(!list_empty(&devlink->param_list)); + WARN_ON(!list_empty(&devlink->resource_list)); + WARN_ON(!list_empty(&devlink->dpipe_table_list)); + WARN_ON(!list_empty(&devlink->sb_list)); + WARN_ON(!list_empty(&devlink->port_list)); + kfree(devlink); } EXPORT_SYMBOL_GPL(devlink_free); @@ -3954,6 +5322,7 @@ int devlink_port_register(struct devlink *devlink, devlink_port->index = port_index; devlink_port->registered = true; list_add_tail(&devlink_port->list, &devlink->port_list); + INIT_LIST_HEAD(&devlink_port->param_list); mutex_unlock(&devlink->lock); devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW); return 0; @@ -4471,18 +5840,23 @@ out: } EXPORT_SYMBOL_GPL(devlink_resource_occ_get_unregister); -/** - * devlink_params_register - register configuration parameters - * - * @devlink: devlink - * @params: configuration parameters array - * @params_count: number of parameters provided - * - * Register the configuration parameters supported by the driver. - */ -int devlink_params_register(struct devlink *devlink, - const struct devlink_param *params, - size_t params_count) +static int devlink_param_verify(const struct devlink_param *param) +{ + if (!param || !param->name || !param->supported_cmodes) + return -EINVAL; + if (param->generic) + return devlink_param_generic_verify(param); + else + return devlink_param_driver_verify(param); +} + +static int __devlink_params_register(struct devlink *devlink, + unsigned int port_index, + struct list_head *param_list, + const struct devlink_param *params, + size_t params_count, + enum devlink_command reg_cmd, + enum devlink_command unreg_cmd) { const struct devlink_param *param = params; int i; @@ -4490,20 +5864,12 @@ int devlink_params_register(struct devlink *devlink, mutex_lock(&devlink->lock); for (i = 0; i < params_count; i++, param++) { - if (!param || !param->name || !param->supported_cmodes) { - err = -EINVAL; + err = devlink_param_verify(param); + if (err) goto rollback; - } - if (param->generic) { - err = devlink_param_generic_verify(param); - if (err) - goto rollback; - } else { - err = devlink_param_driver_verify(param); - if (err) - goto rollback; - } - err = devlink_param_register_one(devlink, param); + + err = devlink_param_register_one(devlink, port_index, + param_list, param, reg_cmd); if (err) goto rollback; } @@ -4515,11 +5881,48 @@ rollback: if (!i) goto unlock; for (param--; i > 0; i--, param--) - devlink_param_unregister_one(devlink, param); + devlink_param_unregister_one(devlink, port_index, param_list, + param, unreg_cmd); unlock: mutex_unlock(&devlink->lock); return err; } + +static void __devlink_params_unregister(struct devlink *devlink, + unsigned int port_index, + struct list_head *param_list, + const struct devlink_param *params, + size_t params_count, + enum devlink_command cmd) +{ + const struct devlink_param *param = params; + int i; + + mutex_lock(&devlink->lock); + for (i = 0; i < params_count; i++, param++) + devlink_param_unregister_one(devlink, 0, param_list, param, + cmd); + mutex_unlock(&devlink->lock); +} + +/** + * devlink_params_register - register configuration parameters + * + * @devlink: devlink + * @params: configuration parameters array + * @params_count: number of parameters provided + * + * Register the configuration parameters supported by the driver. + */ +int devlink_params_register(struct devlink *devlink, + const struct devlink_param *params, + size_t params_count) +{ + return __devlink_params_register(devlink, 0, &devlink->param_list, + params, params_count, + DEVLINK_CMD_PARAM_NEW, + DEVLINK_CMD_PARAM_DEL); +} EXPORT_SYMBOL_GPL(devlink_params_register); /** @@ -4532,36 +5935,103 @@ void devlink_params_unregister(struct devlink *devlink, const struct devlink_param *params, size_t params_count) { - const struct devlink_param *param = params; - int i; - - mutex_lock(&devlink->lock); - for (i = 0; i < params_count; i++, param++) - devlink_param_unregister_one(devlink, param); - mutex_unlock(&devlink->lock); + return __devlink_params_unregister(devlink, 0, &devlink->param_list, + params, params_count, + DEVLINK_CMD_PARAM_DEL); } EXPORT_SYMBOL_GPL(devlink_params_unregister); /** - * devlink_param_driverinit_value_get - get configuration parameter - * value for driver initializing + * devlink_params_publish - publish configuration parameters * * @devlink: devlink - * @param_id: parameter ID - * @init_val: value of parameter in driverinit configuration mode * - * This function should be used by the driver to get driverinit - * configuration for initialization after reload command. + * Publish previously registered configuration parameters. */ -int devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id, - union devlink_param_value *init_val) +void devlink_params_publish(struct devlink *devlink) { struct devlink_param_item *param_item; - if (!devlink->ops || !devlink->ops->reload) - return -EOPNOTSUPP; + list_for_each_entry(param_item, &devlink->param_list, list) { + if (param_item->published) + continue; + param_item->published = true; + devlink_param_notify(devlink, 0, param_item, + DEVLINK_CMD_PARAM_NEW); + } +} +EXPORT_SYMBOL_GPL(devlink_params_publish); - param_item = devlink_param_find_by_id(&devlink->param_list, param_id); +/** + * devlink_params_unpublish - unpublish configuration parameters + * + * @devlink: devlink + * + * Unpublish previously registered configuration parameters. + */ +void devlink_params_unpublish(struct devlink *devlink) +{ + struct devlink_param_item *param_item; + + list_for_each_entry(param_item, &devlink->param_list, list) { + if (!param_item->published) + continue; + param_item->published = false; + devlink_param_notify(devlink, 0, param_item, + DEVLINK_CMD_PARAM_DEL); + } +} +EXPORT_SYMBOL_GPL(devlink_params_unpublish); + +/** + * devlink_port_params_register - register port configuration parameters + * + * @devlink_port: devlink port + * @params: configuration parameters array + * @params_count: number of parameters provided + * + * Register the configuration parameters supported by the port. + */ +int devlink_port_params_register(struct devlink_port *devlink_port, + const struct devlink_param *params, + size_t params_count) +{ + return __devlink_params_register(devlink_port->devlink, + devlink_port->index, + &devlink_port->param_list, params, + params_count, + DEVLINK_CMD_PORT_PARAM_NEW, + DEVLINK_CMD_PORT_PARAM_DEL); +} +EXPORT_SYMBOL_GPL(devlink_port_params_register); + +/** + * devlink_port_params_unregister - unregister port configuration + * parameters + * + * @devlink_port: devlink port + * @params: configuration parameters array + * @params_count: number of parameters provided + */ +void devlink_port_params_unregister(struct devlink_port *devlink_port, + const struct devlink_param *params, + size_t params_count) +{ + return __devlink_params_unregister(devlink_port->devlink, + devlink_port->index, + &devlink_port->param_list, + params, params_count, + DEVLINK_CMD_PORT_PARAM_DEL); +} +EXPORT_SYMBOL_GPL(devlink_port_params_unregister); + +static int +__devlink_param_driverinit_value_get(struct list_head *param_list, u32 param_id, + union devlink_param_value *init_val) +{ + struct devlink_param_item *param_item; + + param_item = devlink_param_find_by_id(param_list, param_id); if (!param_item) return -EINVAL; @@ -4577,6 +6047,54 @@ int devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id, return 0; } + +static int +__devlink_param_driverinit_value_set(struct devlink *devlink, + unsigned int port_index, + struct list_head *param_list, u32 param_id, + union devlink_param_value init_val, + enum devlink_command cmd) +{ + struct devlink_param_item *param_item; + + param_item = devlink_param_find_by_id(param_list, param_id); + if (!param_item) + return -EINVAL; + + if (!devlink_param_cmode_is_supported(param_item->param, + DEVLINK_PARAM_CMODE_DRIVERINIT)) + return -EOPNOTSUPP; + + if (param_item->param->type == DEVLINK_PARAM_TYPE_STRING) + strcpy(param_item->driverinit_value.vstr, init_val.vstr); + else + param_item->driverinit_value = init_val; + param_item->driverinit_value_valid = true; + + devlink_param_notify(devlink, port_index, param_item, cmd); + return 0; +} + +/** + * devlink_param_driverinit_value_get - get configuration parameter + * value for driver initializing + * + * @devlink: devlink + * @param_id: parameter ID + * @init_val: value of parameter in driverinit configuration mode + * + * This function should be used by the driver to get driverinit + * configuration for initialization after reload command. + */ +int devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id, + union devlink_param_value *init_val) +{ + if (!devlink->ops || !devlink->ops->reload) + return -EOPNOTSUPP; + + return __devlink_param_driverinit_value_get(&devlink->param_list, + param_id, init_val); +} EXPORT_SYMBOL_GPL(devlink_param_driverinit_value_get); /** @@ -4594,26 +6112,61 @@ EXPORT_SYMBOL_GPL(devlink_param_driverinit_value_get); int devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id, union devlink_param_value init_val) { - struct devlink_param_item *param_item; + return __devlink_param_driverinit_value_set(devlink, 0, + &devlink->param_list, + param_id, init_val, + DEVLINK_CMD_PARAM_NEW); +} +EXPORT_SYMBOL_GPL(devlink_param_driverinit_value_set); - param_item = devlink_param_find_by_id(&devlink->param_list, param_id); - if (!param_item) - return -EINVAL; +/** + * devlink_port_param_driverinit_value_get - get configuration parameter + * value for driver initializing + * + * @devlink_port: devlink_port + * @param_id: parameter ID + * @init_val: value of parameter in driverinit configuration mode + * + * This function should be used by the driver to get driverinit + * configuration for initialization after reload command. + */ +int devlink_port_param_driverinit_value_get(struct devlink_port *devlink_port, + u32 param_id, + union devlink_param_value *init_val) +{ + struct devlink *devlink = devlink_port->devlink; - if (!devlink_param_cmode_is_supported(param_item->param, - DEVLINK_PARAM_CMODE_DRIVERINIT)) + if (!devlink->ops || !devlink->ops->reload) return -EOPNOTSUPP; - if (param_item->param->type == DEVLINK_PARAM_TYPE_STRING) - strcpy(param_item->driverinit_value.vstr, init_val.vstr); - else - param_item->driverinit_value = init_val; - param_item->driverinit_value_valid = true; + return __devlink_param_driverinit_value_get(&devlink_port->param_list, + param_id, init_val); +} +EXPORT_SYMBOL_GPL(devlink_port_param_driverinit_value_get); - devlink_param_notify(devlink, param_item, DEVLINK_CMD_PARAM_NEW); - return 0; +/** + * devlink_port_param_driverinit_value_set - set value of configuration + * parameter for driverinit + * configuration mode + * + * @devlink_port: devlink_port + * @param_id: parameter ID + * @init_val: value of parameter to set for driverinit configuration mode + * + * This function should be used by the driver to set driverinit + * configuration mode default value. + */ +int devlink_port_param_driverinit_value_set(struct devlink_port *devlink_port, + u32 param_id, + union devlink_param_value init_val) +{ + return __devlink_param_driverinit_value_set(devlink_port->devlink, + devlink_port->index, + &devlink_port->param_list, + param_id, init_val, + DEVLINK_CMD_PORT_PARAM_NEW); } -EXPORT_SYMBOL_GPL(devlink_param_driverinit_value_set); +EXPORT_SYMBOL_GPL(devlink_port_param_driverinit_value_set); /** * devlink_param_value_changed - notify devlink on a parameter's value @@ -4626,7 +6179,6 @@ EXPORT_SYMBOL_GPL(devlink_param_driverinit_value_set); * This function should be used by the driver to notify devlink on value * change, excluding driverinit configuration mode. * For driverinit configuration mode driver should use the function - * devlink_param_driverinit_value_set() instead. */ void devlink_param_value_changed(struct devlink *devlink, u32 param_id) { @@ -4635,11 +6187,38 @@ void devlink_param_value_changed(struct devlink *devlink, u32 param_id) param_item = devlink_param_find_by_id(&devlink->param_list, param_id); WARN_ON(!param_item); - devlink_param_notify(devlink, param_item, DEVLINK_CMD_PARAM_NEW); + devlink_param_notify(devlink, 0, param_item, DEVLINK_CMD_PARAM_NEW); } EXPORT_SYMBOL_GPL(devlink_param_value_changed); /** + * devlink_port_param_value_changed - notify devlink on a parameter's value + * change. Should be called by the driver + * right after the change. + * + * @devlink_port: devlink_port + * @param_id: parameter ID + * + * This function should be used by the driver to notify devlink on value + * change, excluding driverinit configuration mode. + * For driverinit configuration mode driver should use the function + * devlink_port_param_driverinit_value_set() instead. + */ +void devlink_port_param_value_changed(struct devlink_port *devlink_port, + u32 param_id) +{ + struct devlink_param_item *param_item; + + param_item = devlink_param_find_by_id(&devlink_port->param_list, + param_id); + WARN_ON(!param_item); + + devlink_param_notify(devlink_port->devlink, devlink_port->index, + param_item, DEVLINK_CMD_PORT_PARAM_NEW); +} +EXPORT_SYMBOL_GPL(devlink_port_param_value_changed); + +/** * devlink_param_value_str_fill - Safely fill-up the string preventing * from overflow of the preallocated buffer * @@ -4808,6 +6387,99 @@ unlock: } EXPORT_SYMBOL_GPL(devlink_region_snapshot_create); +static void __devlink_compat_running_version(struct devlink *devlink, + char *buf, size_t len) +{ + const struct nlattr *nlattr; + struct devlink_info_req req; + struct sk_buff *msg; + int rem, err; + + if (!devlink->ops->info_get) + return; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return; + + req.msg = msg; + err = devlink->ops->info_get(devlink, &req, NULL); + if (err) + goto free_msg; + + nla_for_each_attr(nlattr, (void *)msg->data, msg->len, rem) { + const struct nlattr *kv; + int rem_kv; + + if (nla_type(nlattr) != DEVLINK_ATTR_INFO_VERSION_RUNNING) + continue; + + nla_for_each_nested(kv, nlattr, rem_kv) { + if (nla_type(kv) != DEVLINK_ATTR_INFO_VERSION_VALUE) + continue; + + strlcat(buf, nla_data(kv), len); + strlcat(buf, " ", len); + } + } +free_msg: + nlmsg_free(msg); +} + +void devlink_compat_running_version(struct net_device *dev, + char *buf, size_t len) +{ + struct devlink_port *devlink_port; + struct devlink *devlink; + + mutex_lock(&devlink_mutex); + list_for_each_entry(devlink, &devlink_list, list) { + mutex_lock(&devlink->lock); + list_for_each_entry(devlink_port, &devlink->port_list, list) { + if (devlink_port->type == DEVLINK_PORT_TYPE_ETH && + devlink_port->type_dev == dev) { + __devlink_compat_running_version(devlink, + buf, len); + mutex_unlock(&devlink->lock); + goto out; + } + } + mutex_unlock(&devlink->lock); + } +out: + mutex_unlock(&devlink_mutex); +} + +int devlink_compat_flash_update(struct net_device *dev, const char *file_name) +{ + struct devlink_port *devlink_port; + struct devlink *devlink; + + mutex_lock(&devlink_mutex); + list_for_each_entry(devlink, &devlink_list, list) { + mutex_lock(&devlink->lock); + list_for_each_entry(devlink_port, &devlink->port_list, list) { + int ret = -EOPNOTSUPP; + + if (devlink_port->type != DEVLINK_PORT_TYPE_ETH || + devlink_port->type_dev != dev) + continue; + + mutex_unlock(&devlink_mutex); + if (devlink->ops->flash_update) + ret = devlink->ops->flash_update(devlink, + file_name, + NULL, NULL); + mutex_unlock(&devlink->lock); + return ret; + } + mutex_unlock(&devlink->lock); + } + mutex_unlock(&devlink_mutex); + + return -EOPNOTSUPP; +} + static int __init devlink_module_init(void) { return genl_register_family(&devlink_nl_family); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 158264f7cfaf..1320e8dce559 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -27,7 +27,9 @@ #include <linux/rtnetlink.h> #include <linux/sched/signal.h> #include <linux/net.h> +#include <net/devlink.h> #include <net/xdp_sock.h> +#include <net/flow_offload.h> /* * Some useful ethtool_ops methods that're device independent. @@ -803,6 +805,12 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev, if (ops->get_eeprom_len) info.eedump_len = ops->get_eeprom_len(dev); + rtnl_unlock(); + if (!info.fw_version[0]) + devlink_compat_running_version(dev, info.fw_version, + sizeof(info.fw_version)); + rtnl_lock(); + if (copy_to_user(useraddr, &info, sizeof(info))) return -EFAULT; return 0; @@ -1348,12 +1356,9 @@ static int ethtool_get_regs(struct net_device *dev, char __user *useraddr) if (regs.len > reglen) regs.len = reglen; - regbuf = NULL; - if (reglen) { - regbuf = vzalloc(reglen); - if (!regbuf) - return -ENOMEM; - } + regbuf = vzalloc(reglen); + if (!regbuf) + return -ENOMEM; ops->get_regs(dev, ®s, regbuf); @@ -2033,11 +2038,17 @@ static noinline_for_stack int ethtool_flash_device(struct net_device *dev, if (copy_from_user(&efl, useraddr, sizeof(efl))) return -EFAULT; + efl.data[ETHTOOL_FLASH_MAX_FILENAME - 1] = 0; - if (!dev->ethtool_ops->flash_device) - return -EOPNOTSUPP; + if (!dev->ethtool_ops->flash_device) { + int ret; - efl.data[ETHTOOL_FLASH_MAX_FILENAME - 1] = 0; + rtnl_unlock(); + ret = devlink_compat_flash_update(dev, efl.data); + rtnl_lock(); + + return ret; + } return dev->ethtool_ops->flash_device(dev, &efl); } @@ -2816,3 +2827,241 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) return rc; } + +struct ethtool_rx_flow_key { + struct flow_dissector_key_basic basic; + union { + struct flow_dissector_key_ipv4_addrs ipv4; + struct flow_dissector_key_ipv6_addrs ipv6; + }; + struct flow_dissector_key_ports tp; + struct flow_dissector_key_ip ip; + struct flow_dissector_key_vlan vlan; + struct flow_dissector_key_eth_addrs eth_addrs; +} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */ + +struct ethtool_rx_flow_match { + struct flow_dissector dissector; + struct ethtool_rx_flow_key key; + struct ethtool_rx_flow_key mask; +}; + +struct ethtool_rx_flow_rule * +ethtool_rx_flow_rule_create(const struct ethtool_rx_flow_spec_input *input) +{ + const struct ethtool_rx_flow_spec *fs = input->fs; + static struct in6_addr zero_addr = {}; + struct ethtool_rx_flow_match *match; + struct ethtool_rx_flow_rule *flow; + struct flow_action_entry *act; + + flow = kzalloc(sizeof(struct ethtool_rx_flow_rule) + + sizeof(struct ethtool_rx_flow_match), GFP_KERNEL); + if (!flow) + return ERR_PTR(-ENOMEM); + + /* ethtool_rx supports only one single action per rule. */ + flow->rule = flow_rule_alloc(1); + if (!flow->rule) { + kfree(flow); + return ERR_PTR(-ENOMEM); + } + + match = (struct ethtool_rx_flow_match *)flow->priv; + flow->rule->match.dissector = &match->dissector; + flow->rule->match.mask = &match->mask; + flow->rule->match.key = &match->key; + + match->mask.basic.n_proto = htons(0xffff); + + switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT | FLOW_RSS)) { + case TCP_V4_FLOW: + case UDP_V4_FLOW: { + const struct ethtool_tcpip4_spec *v4_spec, *v4_m_spec; + + match->key.basic.n_proto = htons(ETH_P_IP); + + v4_spec = &fs->h_u.tcp_ip4_spec; + v4_m_spec = &fs->m_u.tcp_ip4_spec; + + if (v4_m_spec->ip4src) { + match->key.ipv4.src = v4_spec->ip4src; + match->mask.ipv4.src = v4_m_spec->ip4src; + } + if (v4_m_spec->ip4dst) { + match->key.ipv4.dst = v4_spec->ip4dst; + match->mask.ipv4.dst = v4_m_spec->ip4dst; + } + if (v4_m_spec->ip4src || + v4_m_spec->ip4dst) { + match->dissector.used_keys |= + BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS); + match->dissector.offset[FLOW_DISSECTOR_KEY_IPV4_ADDRS] = + offsetof(struct ethtool_rx_flow_key, ipv4); + } + if (v4_m_spec->psrc) { + match->key.tp.src = v4_spec->psrc; + match->mask.tp.src = v4_m_spec->psrc; + } + if (v4_m_spec->pdst) { + match->key.tp.dst = v4_spec->pdst; + match->mask.tp.dst = v4_m_spec->pdst; + } + if (v4_m_spec->psrc || + v4_m_spec->pdst) { + match->dissector.used_keys |= + BIT(FLOW_DISSECTOR_KEY_PORTS); + match->dissector.offset[FLOW_DISSECTOR_KEY_PORTS] = + offsetof(struct ethtool_rx_flow_key, tp); + } + if (v4_m_spec->tos) { + match->key.ip.tos = v4_spec->tos; + match->mask.ip.tos = v4_m_spec->tos; + match->dissector.used_keys |= + BIT(FLOW_DISSECTOR_KEY_IP); + match->dissector.offset[FLOW_DISSECTOR_KEY_IP] = + offsetof(struct ethtool_rx_flow_key, ip); + } + } + break; + case TCP_V6_FLOW: + case UDP_V6_FLOW: { + const struct ethtool_tcpip6_spec *v6_spec, *v6_m_spec; + + match->key.basic.n_proto = htons(ETH_P_IPV6); + + v6_spec = &fs->h_u.tcp_ip6_spec; + v6_m_spec = &fs->m_u.tcp_ip6_spec; + if (memcmp(v6_m_spec->ip6src, &zero_addr, sizeof(zero_addr))) { + memcpy(&match->key.ipv6.src, v6_spec->ip6src, + sizeof(match->key.ipv6.src)); + memcpy(&match->mask.ipv6.src, v6_m_spec->ip6src, + sizeof(match->mask.ipv6.src)); + } + if (memcmp(v6_m_spec->ip6dst, &zero_addr, sizeof(zero_addr))) { + memcpy(&match->key.ipv6.dst, v6_spec->ip6dst, + sizeof(match->key.ipv6.dst)); + memcpy(&match->mask.ipv6.dst, v6_m_spec->ip6dst, + sizeof(match->mask.ipv6.dst)); + } + if (memcmp(v6_m_spec->ip6src, &zero_addr, sizeof(zero_addr)) || + memcmp(v6_m_spec->ip6src, &zero_addr, sizeof(zero_addr))) { + match->dissector.used_keys |= + BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS); + match->dissector.offset[FLOW_DISSECTOR_KEY_IPV6_ADDRS] = + offsetof(struct ethtool_rx_flow_key, ipv6); + } + if (v6_m_spec->psrc) { + match->key.tp.src = v6_spec->psrc; + match->mask.tp.src = v6_m_spec->psrc; + } + if (v6_m_spec->pdst) { + match->key.tp.dst = v6_spec->pdst; + match->mask.tp.dst = v6_m_spec->pdst; + } + if (v6_m_spec->psrc || + v6_m_spec->pdst) { + match->dissector.used_keys |= + BIT(FLOW_DISSECTOR_KEY_PORTS); + match->dissector.offset[FLOW_DISSECTOR_KEY_PORTS] = + offsetof(struct ethtool_rx_flow_key, tp); + } + if (v6_m_spec->tclass) { + match->key.ip.tos = v6_spec->tclass; + match->mask.ip.tos = v6_m_spec->tclass; + match->dissector.used_keys |= + BIT(FLOW_DISSECTOR_KEY_IP); + match->dissector.offset[FLOW_DISSECTOR_KEY_IP] = + offsetof(struct ethtool_rx_flow_key, ip); + } + } + break; + default: + ethtool_rx_flow_rule_destroy(flow); + return ERR_PTR(-EINVAL); + } + + switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT | FLOW_RSS)) { + case TCP_V4_FLOW: + case TCP_V6_FLOW: + match->key.basic.ip_proto = IPPROTO_TCP; + break; + case UDP_V4_FLOW: + case UDP_V6_FLOW: + match->key.basic.ip_proto = IPPROTO_UDP; + break; + } + match->mask.basic.ip_proto = 0xff; + + match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_BASIC); + match->dissector.offset[FLOW_DISSECTOR_KEY_BASIC] = + offsetof(struct ethtool_rx_flow_key, basic); + + if (fs->flow_type & FLOW_EXT) { + const struct ethtool_flow_ext *ext_h_spec = &fs->h_ext; + const struct ethtool_flow_ext *ext_m_spec = &fs->m_ext; + + if (ext_m_spec->vlan_etype && + ext_m_spec->vlan_tci) { + match->key.vlan.vlan_tpid = ext_h_spec->vlan_etype; + match->mask.vlan.vlan_tpid = ext_m_spec->vlan_etype; + + match->key.vlan.vlan_id = + ntohs(ext_h_spec->vlan_tci) & 0x0fff; + match->mask.vlan.vlan_id = + ntohs(ext_m_spec->vlan_tci) & 0x0fff; + + match->key.vlan.vlan_priority = + (ntohs(ext_h_spec->vlan_tci) & 0xe000) >> 13; + match->mask.vlan.vlan_priority = + (ntohs(ext_m_spec->vlan_tci) & 0xe000) >> 13; + + match->dissector.used_keys |= + BIT(FLOW_DISSECTOR_KEY_VLAN); + match->dissector.offset[FLOW_DISSECTOR_KEY_VLAN] = + offsetof(struct ethtool_rx_flow_key, vlan); + } + } + if (fs->flow_type & FLOW_MAC_EXT) { + const struct ethtool_flow_ext *ext_h_spec = &fs->h_ext; + const struct ethtool_flow_ext *ext_m_spec = &fs->m_ext; + + memcpy(match->key.eth_addrs.dst, ext_h_spec->h_dest, + ETH_ALEN); + memcpy(match->mask.eth_addrs.dst, ext_m_spec->h_dest, + ETH_ALEN); + + match->dissector.used_keys |= + BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS); + match->dissector.offset[FLOW_DISSECTOR_KEY_ETH_ADDRS] = + offsetof(struct ethtool_rx_flow_key, eth_addrs); + } + + act = &flow->rule->action.entries[0]; + switch (fs->ring_cookie) { + case RX_CLS_FLOW_DISC: + act->id = FLOW_ACTION_DROP; + break; + case RX_CLS_FLOW_WAKE: + act->id = FLOW_ACTION_WAKE; + break; + default: + act->id = FLOW_ACTION_QUEUE; + if (fs->flow_type & FLOW_RSS) + act->queue.ctx = input->rss_ctx; + + act->queue.vf = ethtool_get_flow_spec_ring_vf(fs->ring_cookie); + act->queue.index = ethtool_get_flow_spec_ring(fs->ring_cookie); + break; + } + + return flow; +} +EXPORT_SYMBOL(ethtool_rx_flow_rule_create); + +void ethtool_rx_flow_rule_destroy(struct ethtool_rx_flow_rule *flow) +{ + kfree(flow->rule); + kfree(flow); +} +EXPORT_SYMBOL(ethtool_rx_flow_rule_destroy); diff --git a/net/core/filter.c b/net/core/filter.c index 41984ad4b9b4..b584cb42a803 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -73,6 +73,7 @@ #include <linux/seg6_local.h> #include <net/seg6.h> #include <net/seg6_local.h> +#include <net/lwtunnel.h> /** * sk_filter_trim_cap - run a packet through a socket filter @@ -1793,6 +1794,20 @@ static const struct bpf_func_proto bpf_skb_pull_data_proto = { .arg2_type = ARG_ANYTHING, }; +BPF_CALL_1(bpf_sk_fullsock, struct sock *, sk) +{ + sk = sk_to_full_sk(sk); + + return sk_fullsock(sk) ? (unsigned long)sk : (unsigned long)NULL; +} + +static const struct bpf_func_proto bpf_sk_fullsock_proto = { + .func = bpf_sk_fullsock, + .gpl_only = false, + .ret_type = RET_PTR_TO_SOCKET_OR_NULL, + .arg1_type = ARG_PTR_TO_SOCK_COMMON, +}; + static inline int sk_skb_try_make_writable(struct sk_buff *skb, unsigned int write_len) { @@ -4112,10 +4127,12 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, /* Only some socketops are supported */ switch (optname) { case SO_RCVBUF: + val = min_t(u32, val, sysctl_rmem_max); sk->sk_userlocks |= SOCK_RCVBUF_LOCK; sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF); break; case SO_SNDBUF: + val = min_t(u32, val, sysctl_wmem_max); sk->sk_userlocks |= SOCK_SNDBUF_LOCK; sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF); break; @@ -4801,7 +4818,15 @@ static int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len } #endif /* CONFIG_IPV6_SEG6_BPF */ -BPF_CALL_4(bpf_lwt_push_encap, struct sk_buff *, skb, u32, type, void *, hdr, +#if IS_ENABLED(CONFIG_LWTUNNEL_BPF) +static int bpf_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len, + bool ingress) +{ + return bpf_lwt_push_ip_encap(skb, hdr, len, ingress); +} +#endif + +BPF_CALL_4(bpf_lwt_in_push_encap, struct sk_buff *, skb, u32, type, void *, hdr, u32, len) { switch (type) { @@ -4810,13 +4835,40 @@ BPF_CALL_4(bpf_lwt_push_encap, struct sk_buff *, skb, u32, type, void *, hdr, case BPF_LWT_ENCAP_SEG6_INLINE: return bpf_push_seg6_encap(skb, type, hdr, len); #endif +#if IS_ENABLED(CONFIG_LWTUNNEL_BPF) + case BPF_LWT_ENCAP_IP: + return bpf_push_ip_encap(skb, hdr, len, true /* ingress */); +#endif default: return -EINVAL; } } -static const struct bpf_func_proto bpf_lwt_push_encap_proto = { - .func = bpf_lwt_push_encap, +BPF_CALL_4(bpf_lwt_xmit_push_encap, struct sk_buff *, skb, u32, type, + void *, hdr, u32, len) +{ + switch (type) { +#if IS_ENABLED(CONFIG_LWTUNNEL_BPF) + case BPF_LWT_ENCAP_IP: + return bpf_push_ip_encap(skb, hdr, len, false /* egress */); +#endif + default: + return -EINVAL; + } +} + +static const struct bpf_func_proto bpf_lwt_in_push_encap_proto = { + .func = bpf_lwt_in_push_encap, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_PTR_TO_MEM, + .arg4_type = ARG_CONST_SIZE +}; + +static const struct bpf_func_proto bpf_lwt_xmit_push_encap_proto = { + .func = bpf_lwt_xmit_push_encap, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, @@ -5016,6 +5068,54 @@ static const struct bpf_func_proto bpf_lwt_seg6_adjust_srh_proto = { }; #endif /* CONFIG_IPV6_SEG6_BPF */ +#define CONVERT_COMMON_TCP_SOCK_FIELDS(md_type, CONVERT) \ +do { \ + switch (si->off) { \ + case offsetof(md_type, snd_cwnd): \ + CONVERT(snd_cwnd); break; \ + case offsetof(md_type, srtt_us): \ + CONVERT(srtt_us); break; \ + case offsetof(md_type, snd_ssthresh): \ + CONVERT(snd_ssthresh); break; \ + case offsetof(md_type, rcv_nxt): \ + CONVERT(rcv_nxt); break; \ + case offsetof(md_type, snd_nxt): \ + CONVERT(snd_nxt); break; \ + case offsetof(md_type, snd_una): \ + CONVERT(snd_una); break; \ + case offsetof(md_type, mss_cache): \ + CONVERT(mss_cache); break; \ + case offsetof(md_type, ecn_flags): \ + CONVERT(ecn_flags); break; \ + case offsetof(md_type, rate_delivered): \ + CONVERT(rate_delivered); break; \ + case offsetof(md_type, rate_interval_us): \ + CONVERT(rate_interval_us); break; \ + case offsetof(md_type, packets_out): \ + CONVERT(packets_out); break; \ + case offsetof(md_type, retrans_out): \ + CONVERT(retrans_out); break; \ + case offsetof(md_type, total_retrans): \ + CONVERT(total_retrans); break; \ + case offsetof(md_type, segs_in): \ + CONVERT(segs_in); break; \ + case offsetof(md_type, data_segs_in): \ + CONVERT(data_segs_in); break; \ + case offsetof(md_type, segs_out): \ + CONVERT(segs_out); break; \ + case offsetof(md_type, data_segs_out): \ + CONVERT(data_segs_out); break; \ + case offsetof(md_type, lost_out): \ + CONVERT(lost_out); break; \ + case offsetof(md_type, sacked_out): \ + CONVERT(sacked_out); break; \ + case offsetof(md_type, bytes_received): \ + CONVERT(bytes_received); break; \ + case offsetof(md_type, bytes_acked): \ + CONVERT(bytes_acked); break; \ + } \ +} while (0) + #ifdef CONFIG_INET static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple, int dif, int sdif, u8 family, u8 proto) @@ -5253,6 +5353,79 @@ static const struct bpf_func_proto bpf_sock_addr_sk_lookup_udp_proto = { .arg5_type = ARG_ANYTHING, }; +bool bpf_tcp_sock_is_valid_access(int off, int size, enum bpf_access_type type, + struct bpf_insn_access_aux *info) +{ + if (off < 0 || off >= offsetofend(struct bpf_tcp_sock, bytes_acked)) + return false; + + if (off % size != 0) + return false; + + switch (off) { + case offsetof(struct bpf_tcp_sock, bytes_received): + case offsetof(struct bpf_tcp_sock, bytes_acked): + return size == sizeof(__u64); + default: + return size == sizeof(__u32); + } +} + +u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type, + const struct bpf_insn *si, + struct bpf_insn *insn_buf, + struct bpf_prog *prog, u32 *target_size) +{ + struct bpf_insn *insn = insn_buf; + +#define BPF_TCP_SOCK_GET_COMMON(FIELD) \ + do { \ + BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, FIELD) > \ + FIELD_SIZEOF(struct bpf_tcp_sock, FIELD)); \ + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct tcp_sock, FIELD),\ + si->dst_reg, si->src_reg, \ + offsetof(struct tcp_sock, FIELD)); \ + } while (0) + + CONVERT_COMMON_TCP_SOCK_FIELDS(struct bpf_tcp_sock, + BPF_TCP_SOCK_GET_COMMON); + + if (insn > insn_buf) + return insn - insn_buf; + + switch (si->off) { + case offsetof(struct bpf_tcp_sock, rtt_min): + BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, rtt_min) != + sizeof(struct minmax)); + BUILD_BUG_ON(sizeof(struct minmax) < + sizeof(struct minmax_sample)); + + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, + offsetof(struct tcp_sock, rtt_min) + + offsetof(struct minmax_sample, v)); + break; + } + + return insn - insn_buf; +} + +BPF_CALL_1(bpf_tcp_sock, struct sock *, sk) +{ + sk = sk_to_full_sk(sk); + + if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP) + return (unsigned long)sk; + + return (unsigned long)NULL; +} + +static const struct bpf_func_proto bpf_tcp_sock_proto = { + .func = bpf_tcp_sock, + .gpl_only = false, + .ret_type = RET_PTR_TO_TCP_SOCK_OR_NULL, + .arg1_type = ARG_PTR_TO_SOCK_COMMON, +}; + #endif /* CONFIG_INET */ bool bpf_helper_changes_pkt_data(void *func) @@ -5282,7 +5455,8 @@ bool bpf_helper_changes_pkt_data(void *func) func == bpf_lwt_seg6_adjust_srh || func == bpf_lwt_seg6_action || #endif - func == bpf_lwt_push_encap) + func == bpf_lwt_in_push_encap || + func == bpf_lwt_xmit_push_encap) return true; return false; @@ -5314,10 +5488,20 @@ bpf_base_func_proto(enum bpf_func_id func_id) return &bpf_tail_call_proto; case BPF_FUNC_ktime_get_ns: return &bpf_ktime_get_ns_proto; + default: + break; + } + + if (!capable(CAP_SYS_ADMIN)) + return NULL; + + switch (func_id) { + case BPF_FUNC_spin_lock: + return &bpf_spin_lock_proto; + case BPF_FUNC_spin_unlock: + return &bpf_spin_unlock_proto; case BPF_FUNC_trace_printk: - if (capable(CAP_SYS_ADMIN)) - return bpf_get_trace_printk_proto(); - /* else, fall through */ + return bpf_get_trace_printk_proto(); default: return NULL; } @@ -5396,6 +5580,12 @@ cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) switch (func_id) { case BPF_FUNC_get_local_storage: return &bpf_get_local_storage_proto; + case BPF_FUNC_sk_fullsock: + return &bpf_sk_fullsock_proto; +#ifdef CONFIG_INET + case BPF_FUNC_tcp_sock: + return &bpf_tcp_sock_proto; +#endif default: return sk_filter_func_proto(func_id, prog); } @@ -5467,6 +5657,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_get_socket_uid_proto; case BPF_FUNC_fib_lookup: return &bpf_skb_fib_lookup_proto; + case BPF_FUNC_sk_fullsock: + return &bpf_sk_fullsock_proto; #ifdef CONFIG_XFRM case BPF_FUNC_skb_get_xfrm_state: return &bpf_skb_get_xfrm_state_proto; @@ -5484,6 +5676,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_sk_lookup_udp_proto; case BPF_FUNC_sk_release: return &bpf_sk_release_proto; + case BPF_FUNC_tcp_sock: + return &bpf_tcp_sock_proto; #endif default: return bpf_base_func_proto(func_id); @@ -5660,7 +5854,7 @@ lwt_in_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { switch (func_id) { case BPF_FUNC_lwt_push_encap: - return &bpf_lwt_push_encap_proto; + return &bpf_lwt_in_push_encap_proto; default: return lwt_out_func_proto(func_id, prog); } @@ -5696,6 +5890,8 @@ lwt_xmit_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_l4_csum_replace_proto; case BPF_FUNC_set_hash_invalid: return &bpf_set_hash_invalid_proto; + case BPF_FUNC_lwt_push_encap: + return &bpf_lwt_xmit_push_encap_proto; default: return lwt_out_func_proto(func_id, prog); } @@ -5754,6 +5950,11 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type if (size != sizeof(__u64)) return false; break; + case offsetof(struct __sk_buff, sk): + if (type == BPF_WRITE || size != sizeof(__u64)) + return false; + info->reg_type = PTR_TO_SOCK_COMMON_OR_NULL; + break; default: /* Only narrow read access allowed for now. */ if (type == BPF_WRITE) { @@ -5925,31 +6126,44 @@ full_access: return true; } -static bool __sock_filter_check_size(int off, int size, +bool bpf_sock_common_is_valid_access(int off, int size, + enum bpf_access_type type, struct bpf_insn_access_aux *info) { - const int size_default = sizeof(__u32); - switch (off) { - case bpf_ctx_range(struct bpf_sock, src_ip4): - case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]): - bpf_ctx_record_field_size(info, size_default); - return bpf_ctx_narrow_access_ok(off, size, size_default); + case bpf_ctx_range_till(struct bpf_sock, type, priority): + return false; + default: + return bpf_sock_is_valid_access(off, size, type, info); } - - return size == size_default; } bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type, struct bpf_insn_access_aux *info) { + const int size_default = sizeof(__u32); + if (off < 0 || off >= sizeof(struct bpf_sock)) return false; if (off % size != 0) return false; - if (!__sock_filter_check_size(off, size, info)) - return false; - return true; + + switch (off) { + case offsetof(struct bpf_sock, state): + case offsetof(struct bpf_sock, family): + case offsetof(struct bpf_sock, type): + case offsetof(struct bpf_sock, protocol): + case offsetof(struct bpf_sock, dst_port): + case offsetof(struct bpf_sock, src_port): + case bpf_ctx_range(struct bpf_sock, src_ip4): + case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]): + case bpf_ctx_range(struct bpf_sock, dst_ip4): + case bpf_ctx_range_till(struct bpf_sock, dst_ip6[0], dst_ip6[3]): + bpf_ctx_record_field_size(info, size_default); + return bpf_ctx_narrow_access_ok(off, size, size_default); + } + + return size == size_default; } static bool sock_filter_is_valid_access(int off, int size, @@ -6738,6 +6952,13 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, off += offsetof(struct qdisc_skb_cb, pkt_len); *target_size = 4; *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, off); + break; + + case offsetof(struct __sk_buff, sk): + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk), + si->dst_reg, si->src_reg, + offsetof(struct sk_buff, sk)); + break; } return insn - insn_buf; @@ -6786,24 +7007,32 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, break; case offsetof(struct bpf_sock, family): - BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_family) != 2); - - *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg, - offsetof(struct sock, sk_family)); + *insn++ = BPF_LDX_MEM( + BPF_FIELD_SIZEOF(struct sock_common, skc_family), + si->dst_reg, si->src_reg, + bpf_target_off(struct sock_common, + skc_family, + FIELD_SIZEOF(struct sock_common, + skc_family), + target_size)); break; case offsetof(struct bpf_sock, type): + BUILD_BUG_ON(HWEIGHT32(SK_FL_TYPE_MASK) != BITS_PER_BYTE * 2); *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, offsetof(struct sock, __sk_flags_offset)); *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_TYPE_MASK); *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_TYPE_SHIFT); + *target_size = 2; break; case offsetof(struct bpf_sock, protocol): + BUILD_BUG_ON(HWEIGHT32(SK_FL_PROTO_MASK) != BITS_PER_BYTE); *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, offsetof(struct sock, __sk_flags_offset)); *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK); *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_PROTO_SHIFT); + *target_size = 1; break; case offsetof(struct bpf_sock, src_ip4): @@ -6815,6 +7044,15 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, target_size)); break; + case offsetof(struct bpf_sock, dst_ip4): + *insn++ = BPF_LDX_MEM( + BPF_SIZE(si->code), si->dst_reg, si->src_reg, + bpf_target_off(struct sock_common, skc_daddr, + FIELD_SIZEOF(struct sock_common, + skc_daddr), + target_size)); + break; + case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]): #if IS_ENABLED(CONFIG_IPV6) off = si->off; @@ -6833,6 +7071,23 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, #endif break; + case bpf_ctx_range_till(struct bpf_sock, dst_ip6[0], dst_ip6[3]): +#if IS_ENABLED(CONFIG_IPV6) + off = si->off; + off -= offsetof(struct bpf_sock, dst_ip6[0]); + *insn++ = BPF_LDX_MEM( + BPF_SIZE(si->code), si->dst_reg, si->src_reg, + bpf_target_off(struct sock_common, + skc_v6_daddr.s6_addr32[0], + FIELD_SIZEOF(struct sock_common, + skc_v6_daddr.s6_addr32[0]), + target_size) + off); +#else + *insn++ = BPF_MOV32_IMM(si->dst_reg, 0); + *target_size = 4; +#endif + break; + case offsetof(struct bpf_sock, src_port): *insn++ = BPF_LDX_MEM( BPF_FIELD_SIZEOF(struct sock_common, skc_num), @@ -6842,6 +7097,26 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, skc_num), target_size)); break; + + case offsetof(struct bpf_sock, dst_port): + *insn++ = BPF_LDX_MEM( + BPF_FIELD_SIZEOF(struct sock_common, skc_dport), + si->dst_reg, si->src_reg, + bpf_target_off(struct sock_common, skc_dport, + FIELD_SIZEOF(struct sock_common, + skc_dport), + target_size)); + break; + + case offsetof(struct bpf_sock, state): + *insn++ = BPF_LDX_MEM( + BPF_FIELD_SIZEOF(struct sock_common, skc_state), + si->dst_reg, si->src_reg, + bpf_target_off(struct sock_common, skc_state, + FIELD_SIZEOF(struct sock_common, + skc_state), + target_size)); + break; } return insn - insn_buf; @@ -7089,6 +7364,85 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, struct bpf_insn *insn = insn_buf; int off; +/* Helper macro for adding read access to tcp_sock or sock fields. */ +#define SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \ + do { \ + BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) > \ + FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD)); \ + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ + struct bpf_sock_ops_kern, \ + is_fullsock), \ + si->dst_reg, si->src_reg, \ + offsetof(struct bpf_sock_ops_kern, \ + is_fullsock)); \ + *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 2); \ + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ + struct bpf_sock_ops_kern, sk),\ + si->dst_reg, si->src_reg, \ + offsetof(struct bpf_sock_ops_kern, sk));\ + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(OBJ, \ + OBJ_FIELD), \ + si->dst_reg, si->dst_reg, \ + offsetof(OBJ, OBJ_FIELD)); \ + } while (0) + +#define SOCK_OPS_GET_TCP_SOCK_FIELD(FIELD) \ + SOCK_OPS_GET_FIELD(FIELD, FIELD, struct tcp_sock) + +/* Helper macro for adding write access to tcp_sock or sock fields. + * The macro is called with two registers, dst_reg which contains a pointer + * to ctx (context) and src_reg which contains the value that should be + * stored. However, we need an additional register since we cannot overwrite + * dst_reg because it may be used later in the program. + * Instead we "borrow" one of the other register. We first save its value + * into a new (temp) field in bpf_sock_ops_kern, use it, and then restore + * it at the end of the macro. + */ +#define SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \ + do { \ + int reg = BPF_REG_9; \ + BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) > \ + FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD)); \ + if (si->dst_reg == reg || si->src_reg == reg) \ + reg--; \ + if (si->dst_reg == reg || si->src_reg == reg) \ + reg--; \ + *insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, reg, \ + offsetof(struct bpf_sock_ops_kern, \ + temp)); \ + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ + struct bpf_sock_ops_kern, \ + is_fullsock), \ + reg, si->dst_reg, \ + offsetof(struct bpf_sock_ops_kern, \ + is_fullsock)); \ + *insn++ = BPF_JMP_IMM(BPF_JEQ, reg, 0, 2); \ + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ + struct bpf_sock_ops_kern, sk),\ + reg, si->dst_reg, \ + offsetof(struct bpf_sock_ops_kern, sk));\ + *insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(OBJ, OBJ_FIELD), \ + reg, si->src_reg, \ + offsetof(OBJ, OBJ_FIELD)); \ + *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->dst_reg, \ + offsetof(struct bpf_sock_ops_kern, \ + temp)); \ + } while (0) + +#define SOCK_OPS_GET_OR_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ, TYPE) \ + do { \ + if (TYPE == BPF_WRITE) \ + SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ); \ + else \ + SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ); \ + } while (0) + + CONVERT_COMMON_TCP_SOCK_FIELDS(struct bpf_sock_ops, + SOCK_OPS_GET_TCP_SOCK_FIELD); + + if (insn > insn_buf) + return insn - insn_buf; + switch (si->off) { case offsetof(struct bpf_sock_ops, op) ... offsetof(struct bpf_sock_ops, replylong[3]): @@ -7246,175 +7600,15 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, FIELD_SIZEOF(struct minmax_sample, t)); break; -/* Helper macro for adding read access to tcp_sock or sock fields. */ -#define SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \ - do { \ - BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) > \ - FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD)); \ - *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ - struct bpf_sock_ops_kern, \ - is_fullsock), \ - si->dst_reg, si->src_reg, \ - offsetof(struct bpf_sock_ops_kern, \ - is_fullsock)); \ - *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 2); \ - *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ - struct bpf_sock_ops_kern, sk),\ - si->dst_reg, si->src_reg, \ - offsetof(struct bpf_sock_ops_kern, sk));\ - *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(OBJ, \ - OBJ_FIELD), \ - si->dst_reg, si->dst_reg, \ - offsetof(OBJ, OBJ_FIELD)); \ - } while (0) - -/* Helper macro for adding write access to tcp_sock or sock fields. - * The macro is called with two registers, dst_reg which contains a pointer - * to ctx (context) and src_reg which contains the value that should be - * stored. However, we need an additional register since we cannot overwrite - * dst_reg because it may be used later in the program. - * Instead we "borrow" one of the other register. We first save its value - * into a new (temp) field in bpf_sock_ops_kern, use it, and then restore - * it at the end of the macro. - */ -#define SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \ - do { \ - int reg = BPF_REG_9; \ - BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) > \ - FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD)); \ - if (si->dst_reg == reg || si->src_reg == reg) \ - reg--; \ - if (si->dst_reg == reg || si->src_reg == reg) \ - reg--; \ - *insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, reg, \ - offsetof(struct bpf_sock_ops_kern, \ - temp)); \ - *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ - struct bpf_sock_ops_kern, \ - is_fullsock), \ - reg, si->dst_reg, \ - offsetof(struct bpf_sock_ops_kern, \ - is_fullsock)); \ - *insn++ = BPF_JMP_IMM(BPF_JEQ, reg, 0, 2); \ - *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ - struct bpf_sock_ops_kern, sk),\ - reg, si->dst_reg, \ - offsetof(struct bpf_sock_ops_kern, sk));\ - *insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(OBJ, OBJ_FIELD), \ - reg, si->src_reg, \ - offsetof(OBJ, OBJ_FIELD)); \ - *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->dst_reg, \ - offsetof(struct bpf_sock_ops_kern, \ - temp)); \ - } while (0) - -#define SOCK_OPS_GET_OR_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ, TYPE) \ - do { \ - if (TYPE == BPF_WRITE) \ - SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ); \ - else \ - SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ); \ - } while (0) - - case offsetof(struct bpf_sock_ops, snd_cwnd): - SOCK_OPS_GET_FIELD(snd_cwnd, snd_cwnd, struct tcp_sock); - break; - - case offsetof(struct bpf_sock_ops, srtt_us): - SOCK_OPS_GET_FIELD(srtt_us, srtt_us, struct tcp_sock); - break; - case offsetof(struct bpf_sock_ops, bpf_sock_ops_cb_flags): SOCK_OPS_GET_FIELD(bpf_sock_ops_cb_flags, bpf_sock_ops_cb_flags, struct tcp_sock); break; - case offsetof(struct bpf_sock_ops, snd_ssthresh): - SOCK_OPS_GET_FIELD(snd_ssthresh, snd_ssthresh, struct tcp_sock); - break; - - case offsetof(struct bpf_sock_ops, rcv_nxt): - SOCK_OPS_GET_FIELD(rcv_nxt, rcv_nxt, struct tcp_sock); - break; - - case offsetof(struct bpf_sock_ops, snd_nxt): - SOCK_OPS_GET_FIELD(snd_nxt, snd_nxt, struct tcp_sock); - break; - - case offsetof(struct bpf_sock_ops, snd_una): - SOCK_OPS_GET_FIELD(snd_una, snd_una, struct tcp_sock); - break; - - case offsetof(struct bpf_sock_ops, mss_cache): - SOCK_OPS_GET_FIELD(mss_cache, mss_cache, struct tcp_sock); - break; - - case offsetof(struct bpf_sock_ops, ecn_flags): - SOCK_OPS_GET_FIELD(ecn_flags, ecn_flags, struct tcp_sock); - break; - - case offsetof(struct bpf_sock_ops, rate_delivered): - SOCK_OPS_GET_FIELD(rate_delivered, rate_delivered, - struct tcp_sock); - break; - - case offsetof(struct bpf_sock_ops, rate_interval_us): - SOCK_OPS_GET_FIELD(rate_interval_us, rate_interval_us, - struct tcp_sock); - break; - - case offsetof(struct bpf_sock_ops, packets_out): - SOCK_OPS_GET_FIELD(packets_out, packets_out, struct tcp_sock); - break; - - case offsetof(struct bpf_sock_ops, retrans_out): - SOCK_OPS_GET_FIELD(retrans_out, retrans_out, struct tcp_sock); - break; - - case offsetof(struct bpf_sock_ops, total_retrans): - SOCK_OPS_GET_FIELD(total_retrans, total_retrans, - struct tcp_sock); - break; - - case offsetof(struct bpf_sock_ops, segs_in): - SOCK_OPS_GET_FIELD(segs_in, segs_in, struct tcp_sock); - break; - - case offsetof(struct bpf_sock_ops, data_segs_in): - SOCK_OPS_GET_FIELD(data_segs_in, data_segs_in, struct tcp_sock); - break; - - case offsetof(struct bpf_sock_ops, segs_out): - SOCK_OPS_GET_FIELD(segs_out, segs_out, struct tcp_sock); - break; - - case offsetof(struct bpf_sock_ops, data_segs_out): - SOCK_OPS_GET_FIELD(data_segs_out, data_segs_out, - struct tcp_sock); - break; - - case offsetof(struct bpf_sock_ops, lost_out): - SOCK_OPS_GET_FIELD(lost_out, lost_out, struct tcp_sock); - break; - - case offsetof(struct bpf_sock_ops, sacked_out): - SOCK_OPS_GET_FIELD(sacked_out, sacked_out, struct tcp_sock); - break; - case offsetof(struct bpf_sock_ops, sk_txhash): SOCK_OPS_GET_OR_SET_FIELD(sk_txhash, sk_txhash, struct sock, type); break; - - case offsetof(struct bpf_sock_ops, bytes_received): - SOCK_OPS_GET_FIELD(bytes_received, bytes_received, - struct tcp_sock); - break; - - case offsetof(struct bpf_sock_ops, bytes_acked): - SOCK_OPS_GET_FIELD(bytes_acked, bytes_acked, struct tcp_sock); - break; - } return insn - insn_buf; } diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c new file mode 100644 index 000000000000..c3a00eac4804 --- /dev/null +++ b/net/core/flow_offload.c @@ -0,0 +1,153 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/kernel.h> +#include <linux/slab.h> +#include <net/flow_offload.h> + +struct flow_rule *flow_rule_alloc(unsigned int num_actions) +{ + struct flow_rule *rule; + + rule = kzalloc(sizeof(struct flow_rule) + + sizeof(struct flow_action_entry) * num_actions, + GFP_KERNEL); + if (!rule) + return NULL; + + rule->action.num_entries = num_actions; + + return rule; +} +EXPORT_SYMBOL(flow_rule_alloc); + +#define FLOW_DISSECTOR_MATCH(__rule, __type, __out) \ + const struct flow_match *__m = &(__rule)->match; \ + struct flow_dissector *__d = (__m)->dissector; \ + \ + (__out)->key = skb_flow_dissector_target(__d, __type, (__m)->key); \ + (__out)->mask = skb_flow_dissector_target(__d, __type, (__m)->mask); \ + +void flow_rule_match_basic(const struct flow_rule *rule, + struct flow_match_basic *out) +{ + FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_BASIC, out); +} +EXPORT_SYMBOL(flow_rule_match_basic); + +void flow_rule_match_control(const struct flow_rule *rule, + struct flow_match_control *out) +{ + FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_CONTROL, out); +} +EXPORT_SYMBOL(flow_rule_match_control); + +void flow_rule_match_eth_addrs(const struct flow_rule *rule, + struct flow_match_eth_addrs *out) +{ + FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS, out); +} +EXPORT_SYMBOL(flow_rule_match_eth_addrs); + +void flow_rule_match_vlan(const struct flow_rule *rule, + struct flow_match_vlan *out) +{ + FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_VLAN, out); +} +EXPORT_SYMBOL(flow_rule_match_vlan); + +void flow_rule_match_ipv4_addrs(const struct flow_rule *rule, + struct flow_match_ipv4_addrs *out) +{ + FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_IPV4_ADDRS, out); +} +EXPORT_SYMBOL(flow_rule_match_ipv4_addrs); + +void flow_rule_match_ipv6_addrs(const struct flow_rule *rule, + struct flow_match_ipv6_addrs *out) +{ + FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_IPV6_ADDRS, out); +} +EXPORT_SYMBOL(flow_rule_match_ipv6_addrs); + +void flow_rule_match_ip(const struct flow_rule *rule, + struct flow_match_ip *out) +{ + FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_IP, out); +} +EXPORT_SYMBOL(flow_rule_match_ip); + +void flow_rule_match_ports(const struct flow_rule *rule, + struct flow_match_ports *out) +{ + FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_PORTS, out); +} +EXPORT_SYMBOL(flow_rule_match_ports); + +void flow_rule_match_tcp(const struct flow_rule *rule, + struct flow_match_tcp *out) +{ + FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_TCP, out); +} +EXPORT_SYMBOL(flow_rule_match_tcp); + +void flow_rule_match_icmp(const struct flow_rule *rule, + struct flow_match_icmp *out) +{ + FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_ICMP, out); +} +EXPORT_SYMBOL(flow_rule_match_icmp); + +void flow_rule_match_mpls(const struct flow_rule *rule, + struct flow_match_mpls *out) +{ + FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_MPLS, out); +} +EXPORT_SYMBOL(flow_rule_match_mpls); + +void flow_rule_match_enc_control(const struct flow_rule *rule, + struct flow_match_control *out) +{ + FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL, out); +} +EXPORT_SYMBOL(flow_rule_match_enc_control); + +void flow_rule_match_enc_ipv4_addrs(const struct flow_rule *rule, + struct flow_match_ipv4_addrs *out) +{ + FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, out); +} +EXPORT_SYMBOL(flow_rule_match_enc_ipv4_addrs); + +void flow_rule_match_enc_ipv6_addrs(const struct flow_rule *rule, + struct flow_match_ipv6_addrs *out) +{ + FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, out); +} +EXPORT_SYMBOL(flow_rule_match_enc_ipv6_addrs); + +void flow_rule_match_enc_ip(const struct flow_rule *rule, + struct flow_match_ip *out) +{ + FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_ENC_IP, out); +} +EXPORT_SYMBOL(flow_rule_match_enc_ip); + +void flow_rule_match_enc_ports(const struct flow_rule *rule, + struct flow_match_ports *out) +{ + FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_ENC_PORTS, out); +} +EXPORT_SYMBOL(flow_rule_match_enc_ports); + +void flow_rule_match_enc_keyid(const struct flow_rule *rule, + struct flow_match_enc_keyid *out) +{ + FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_ENC_KEYID, out); +} +EXPORT_SYMBOL(flow_rule_match_enc_keyid); + +void flow_rule_match_enc_opts(const struct flow_rule *rule, + struct flow_match_enc_opts *out) +{ + FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_ENC_OPTS, out); +} +EXPORT_SYMBOL(flow_rule_match_enc_opts); diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c index a648568c5e8f..a5c8c79d468a 100644 --- a/net/core/lwt_bpf.c +++ b/net/core/lwt_bpf.c @@ -16,6 +16,8 @@ #include <linux/types.h> #include <linux/bpf.h> #include <net/lwtunnel.h> +#include <net/gre.h> +#include <net/ip6_route.h> struct bpf_lwt_prog { struct bpf_prog *prog; @@ -55,6 +57,7 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt, switch (ret) { case BPF_OK: + case BPF_LWT_REROUTE: break; case BPF_REDIRECT: @@ -87,6 +90,30 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt, return ret; } +static int bpf_lwt_input_reroute(struct sk_buff *skb) +{ + int err = -EINVAL; + + if (skb->protocol == htons(ETH_P_IP)) { + struct iphdr *iph = ip_hdr(skb); + + err = ip_route_input_noref(skb, iph->daddr, iph->saddr, + iph->tos, skb_dst(skb)->dev); + } else if (skb->protocol == htons(ETH_P_IPV6)) { + err = ipv6_stub->ipv6_route_input(skb); + } else { + err = -EAFNOSUPPORT; + } + + if (err) + goto err; + return dst_input(skb); + +err: + kfree_skb(skb); + return err; +} + static int bpf_input(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); @@ -98,11 +125,11 @@ static int bpf_input(struct sk_buff *skb) ret = run_lwt_bpf(skb, &bpf->in, dst, NO_REDIRECT); if (ret < 0) return ret; + if (ret == BPF_LWT_REROUTE) + return bpf_lwt_input_reroute(skb); } if (unlikely(!dst->lwtstate->orig_input)) { - pr_warn_once("orig_input not set on dst for prog %s\n", - bpf->out.name); kfree_skb(skb); return -EINVAL; } @@ -147,6 +174,102 @@ static int xmit_check_hhlen(struct sk_buff *skb) return 0; } +static int bpf_lwt_xmit_reroute(struct sk_buff *skb) +{ + struct net_device *l3mdev = l3mdev_master_dev_rcu(skb_dst(skb)->dev); + int oif = l3mdev ? l3mdev->ifindex : 0; + struct dst_entry *dst = NULL; + int err = -EAFNOSUPPORT; + struct sock *sk; + struct net *net; + bool ipv4; + + if (skb->protocol == htons(ETH_P_IP)) + ipv4 = true; + else if (skb->protocol == htons(ETH_P_IPV6)) + ipv4 = false; + else + goto err; + + sk = sk_to_full_sk(skb->sk); + if (sk) { + if (sk->sk_bound_dev_if) + oif = sk->sk_bound_dev_if; + net = sock_net(sk); + } else { + net = dev_net(skb_dst(skb)->dev); + } + + if (ipv4) { + struct iphdr *iph = ip_hdr(skb); + struct flowi4 fl4 = {}; + struct rtable *rt; + + fl4.flowi4_oif = oif; + fl4.flowi4_mark = skb->mark; + fl4.flowi4_uid = sock_net_uid(net, sk); + fl4.flowi4_tos = RT_TOS(iph->tos); + fl4.flowi4_flags = FLOWI_FLAG_ANYSRC; + fl4.flowi4_proto = iph->protocol; + fl4.daddr = iph->daddr; + fl4.saddr = iph->saddr; + + rt = ip_route_output_key(net, &fl4); + if (IS_ERR(rt)) { + err = PTR_ERR(rt); + goto err; + } + dst = &rt->dst; + } else { + struct ipv6hdr *iph6 = ipv6_hdr(skb); + struct flowi6 fl6 = {}; + + fl6.flowi6_oif = oif; + fl6.flowi6_mark = skb->mark; + fl6.flowi6_uid = sock_net_uid(net, sk); + fl6.flowlabel = ip6_flowinfo(iph6); + fl6.flowi6_proto = iph6->nexthdr; + fl6.daddr = iph6->daddr; + fl6.saddr = iph6->saddr; + + err = ipv6_stub->ipv6_dst_lookup(net, skb->sk, &dst, &fl6); + if (unlikely(err)) + goto err; + if (IS_ERR(dst)) { + err = PTR_ERR(dst); + goto err; + } + } + if (unlikely(dst->error)) { + err = dst->error; + dst_release(dst); + goto err; + } + + /* Although skb header was reserved in bpf_lwt_push_ip_encap(), it + * was done for the previous dst, so we are doing it here again, in + * case the new dst needs much more space. The call below is a noop + * if there is enough header space in skb. + */ + err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); + if (unlikely(err)) + goto err; + + skb_dst_drop(skb); + skb_dst_set(skb, dst); + + err = dst_output(dev_net(skb_dst(skb)->dev), skb->sk, skb); + if (unlikely(err)) + goto err; + + /* ip[6]_finish_output2 understand LWTUNNEL_XMIT_DONE */ + return LWTUNNEL_XMIT_DONE; + +err: + kfree_skb(skb); + return err; +} + static int bpf_xmit(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); @@ -154,11 +277,20 @@ static int bpf_xmit(struct sk_buff *skb) bpf = bpf_lwt_lwtunnel(dst->lwtstate); if (bpf->xmit.prog) { + __be16 proto = skb->protocol; int ret; ret = run_lwt_bpf(skb, &bpf->xmit, dst, CAN_REDIRECT); switch (ret) { case BPF_OK: + /* If the header changed, e.g. via bpf_lwt_push_encap, + * BPF_LWT_REROUTE below should have been used if the + * protocol was also changed. + */ + if (skb->protocol != proto) { + kfree_skb(skb); + return -EINVAL; + } /* If the header was expanded, headroom might be too * small for L2 header to come, expand as needed. */ @@ -169,6 +301,8 @@ static int bpf_xmit(struct sk_buff *skb) return LWTUNNEL_XMIT_CONTINUE; case BPF_REDIRECT: return LWTUNNEL_XMIT_DONE; + case BPF_LWT_REROUTE: + return bpf_lwt_xmit_reroute(skb); default: return ret; } @@ -390,6 +524,133 @@ static const struct lwtunnel_encap_ops bpf_encap_ops = { .owner = THIS_MODULE, }; +static int handle_gso_type(struct sk_buff *skb, unsigned int gso_type, + int encap_len) +{ + struct skb_shared_info *shinfo = skb_shinfo(skb); + + gso_type |= SKB_GSO_DODGY; + shinfo->gso_type |= gso_type; + skb_decrease_gso_size(shinfo, encap_len); + shinfo->gso_segs = 0; + return 0; +} + +static int handle_gso_encap(struct sk_buff *skb, bool ipv4, int encap_len) +{ + int next_hdr_offset; + void *next_hdr; + __u8 protocol; + + /* SCTP and UDP_L4 gso need more nuanced handling than what + * handle_gso_type() does above: skb_decrease_gso_size() is not enough. + * So at the moment only TCP GSO packets are let through. + */ + if (!(skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) + return -ENOTSUPP; + + if (ipv4) { + protocol = ip_hdr(skb)->protocol; + next_hdr_offset = sizeof(struct iphdr); + next_hdr = skb_network_header(skb) + next_hdr_offset; + } else { + protocol = ipv6_hdr(skb)->nexthdr; + next_hdr_offset = sizeof(struct ipv6hdr); + next_hdr = skb_network_header(skb) + next_hdr_offset; + } + + switch (protocol) { + case IPPROTO_GRE: + next_hdr_offset += sizeof(struct gre_base_hdr); + if (next_hdr_offset > encap_len) + return -EINVAL; + + if (((struct gre_base_hdr *)next_hdr)->flags & GRE_CSUM) + return handle_gso_type(skb, SKB_GSO_GRE_CSUM, + encap_len); + return handle_gso_type(skb, SKB_GSO_GRE, encap_len); + + case IPPROTO_UDP: + next_hdr_offset += sizeof(struct udphdr); + if (next_hdr_offset > encap_len) + return -EINVAL; + + if (((struct udphdr *)next_hdr)->check) + return handle_gso_type(skb, SKB_GSO_UDP_TUNNEL_CSUM, + encap_len); + return handle_gso_type(skb, SKB_GSO_UDP_TUNNEL, encap_len); + + case IPPROTO_IP: + case IPPROTO_IPV6: + if (ipv4) + return handle_gso_type(skb, SKB_GSO_IPXIP4, encap_len); + else + return handle_gso_type(skb, SKB_GSO_IPXIP6, encap_len); + + default: + return -EPROTONOSUPPORT; + } +} + +int bpf_lwt_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len, bool ingress) +{ + struct iphdr *iph; + bool ipv4; + int err; + + if (unlikely(len < sizeof(struct iphdr) || len > LWT_BPF_MAX_HEADROOM)) + return -EINVAL; + + /* validate protocol and length */ + iph = (struct iphdr *)hdr; + if (iph->version == 4) { + ipv4 = true; + if (unlikely(len < iph->ihl * 4)) + return -EINVAL; + } else if (iph->version == 6) { + ipv4 = false; + if (unlikely(len < sizeof(struct ipv6hdr))) + return -EINVAL; + } else { + return -EINVAL; + } + + if (ingress) + err = skb_cow_head(skb, len + skb->mac_len); + else + err = skb_cow_head(skb, + len + LL_RESERVED_SPACE(skb_dst(skb)->dev)); + if (unlikely(err)) + return err; + + /* push the encap headers and fix pointers */ + skb_reset_inner_headers(skb); + skb->encapsulation = 1; + skb_push(skb, len); + if (ingress) + skb_postpush_rcsum(skb, iph, len); + skb_reset_network_header(skb); + memcpy(skb_network_header(skb), hdr, len); + bpf_compute_data_pointers(skb); + skb_clear_hash(skb); + + if (ipv4) { + skb->protocol = htons(ETH_P_IP); + iph = ip_hdr(skb); + + if (!iph->check) + iph->check = ip_fast_csum((unsigned char *)iph, + iph->ihl); + } else { + skb->protocol = htons(ETH_P_IPV6); + } + + if (skb_is_gso(skb)) + return handle_gso_encap(skb, ipv4, len); + + return 0; +} + static int __init bpf_lwt_init(void) { return lwtunnel_encap_add_ops(&bpf_encap_ops, LWTUNNEL_ENCAP_BPF); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 4230400b9a30..30f6fd8f68e0 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -42,6 +42,8 @@ #include <linux/inetdevice.h> #include <net/addrconf.h> +#include <trace/events/neigh.h> + #define DEBUG #define NEIGH_DEBUG 1 #define neigh_dbg(level, fmt, ...) \ @@ -102,6 +104,7 @@ static void neigh_cleanup_and_release(struct neighbour *neigh) if (neigh->parms->neigh_cleanup) neigh->parms->neigh_cleanup(neigh); + trace_neigh_cleanup_and_release(neigh, 0); __neigh_notify(neigh, RTM_DELNEIGH, 0, 0); call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh); neigh_release(neigh); @@ -1095,6 +1098,8 @@ out: if (notify) neigh_update_notify(neigh, 0); + trace_neigh_timer_handler(neigh, 0); + neigh_release(neigh); } @@ -1165,6 +1170,7 @@ out_unlock_bh: else write_unlock(&neigh->lock); local_bh_enable(); + trace_neigh_event_send_done(neigh, rc); return rc; out_dead: @@ -1172,6 +1178,7 @@ out_dead: goto out_unlock_bh; write_unlock_bh(&neigh->lock); kfree_skb(skb); + trace_neigh_event_send_dead(neigh, 1); return 1; } EXPORT_SYMBOL(__neigh_event_send); @@ -1227,6 +1234,8 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr, struct net_device *dev; int update_isrouter = 0; + trace_neigh_update(neigh, lladdr, new, flags, nlmsg_pid); + write_lock_bh(&neigh->lock); dev = neigh->dev; @@ -1393,6 +1402,8 @@ out: if (notify) neigh_update_notify(neigh, nlmsg_pid); + trace_neigh_update_done(neigh, err); + return err; } diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index ff9fd2bb4ce4..7c5061123ead 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -12,7 +12,6 @@ #include <linux/capability.h> #include <linux/kernel.h> #include <linux/netdevice.h> -#include <net/switchdev.h> #include <linux/if_arp.h> #include <linux/slab.h> #include <linux/sched/signal.h> @@ -501,16 +500,11 @@ static ssize_t phys_switch_id_show(struct device *dev, return restart_syscall(); if (dev_isalive(netdev)) { - struct switchdev_attr attr = { - .orig_dev = netdev, - .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, - .flags = SWITCHDEV_F_NO_RECURSE, - }; + struct netdev_phys_item_id ppid = { }; - ret = switchdev_port_attr_get(netdev, &attr); + ret = dev_get_port_parent_id(netdev, &ppid, false); if (!ret) - ret = sprintf(buf, "%*phN\n", attr.u.ppid.id_len, - attr.u.ppid.id); + ret = sprintf(buf, "%*phN\n", ppid.id_len, ppid.id); } rtnl_unlock(); diff --git a/net/core/net-traces.c b/net/core/net-traces.c index 419af6dfe29f..470b179d599e 100644 --- a/net/core/net-traces.c +++ b/net/core/net-traces.c @@ -43,6 +43,14 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(fdb_delete); EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_update); #endif +#include <trace/events/neigh.h> +EXPORT_TRACEPOINT_SYMBOL_GPL(neigh_update); +EXPORT_TRACEPOINT_SYMBOL_GPL(neigh_update_done); +EXPORT_TRACEPOINT_SYMBOL_GPL(neigh_timer_handler); +EXPORT_TRACEPOINT_SYMBOL_GPL(neigh_event_send_done); +EXPORT_TRACEPOINT_SYMBOL_GPL(neigh_event_send_dead); +EXPORT_TRACEPOINT_SYMBOL_GPL(neigh_cleanup_and_release); + EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb); EXPORT_TRACEPOINT_SYMBOL_GPL(napi_poll); diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 43a932cb609b..5b2252c6d49b 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -136,17 +136,19 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool, if (!(pool->p.flags & PP_FLAG_DMA_MAP)) goto skip_dma_map; - /* Setup DMA mapping: use page->private for DMA-addr + /* Setup DMA mapping: use 'struct page' area for storing DMA-addr + * since dma_addr_t can be either 32 or 64 bits and does not always fit + * into page private data (i.e 32bit cpu with 64bit DMA caps) * This mapping is kept for lifetime of page, until leaving pool. */ - dma = dma_map_page(pool->p.dev, page, 0, - (PAGE_SIZE << pool->p.order), - pool->p.dma_dir); + dma = dma_map_page_attrs(pool->p.dev, page, 0, + (PAGE_SIZE << pool->p.order), + pool->p.dma_dir, DMA_ATTR_SKIP_CPU_SYNC); if (dma_mapping_error(pool->p.dev, dma)) { put_page(page); return NULL; } - set_page_private(page, dma); /* page->private = dma; */ + page->dma_addr = dma; skip_dma_map: /* When page just alloc'ed is should/must have refcnt 1. */ @@ -175,13 +177,17 @@ EXPORT_SYMBOL(page_pool_alloc_pages); static void __page_pool_clean_page(struct page_pool *pool, struct page *page) { + dma_addr_t dma; + if (!(pool->p.flags & PP_FLAG_DMA_MAP)) return; + dma = page->dma_addr; /* DMA unmap */ - dma_unmap_page(pool->p.dev, page_private(page), - PAGE_SIZE << pool->p.order, pool->p.dma_dir); - set_page_private(page, 0); + dma_unmap_page_attrs(pool->p.dev, dma, + PAGE_SIZE << pool->p.order, pool->p.dma_dir, + DMA_ATTR_SKIP_CPU_SYNC); + page->dma_addr = 0; } /* Return a page to the page allocator, cleaning up our state */ diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index f5a98082ac7a..a51cab95ba64 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -46,7 +46,6 @@ #include <linux/inet.h> #include <linux/netdevice.h> -#include <net/switchdev.h> #include <net/ip.h> #include <net/protocol.h> #include <net/arp.h> @@ -1146,22 +1145,17 @@ static int rtnl_phys_port_name_fill(struct sk_buff *skb, struct net_device *dev) static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev) { + struct netdev_phys_item_id ppid = { }; int err; - struct switchdev_attr attr = { - .orig_dev = dev, - .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, - .flags = SWITCHDEV_F_NO_RECURSE, - }; - err = switchdev_port_attr_get(dev, &attr); + err = dev_get_port_parent_id(dev, &ppid, false); if (err) { if (err == -EOPNOTSUPP) return 0; return err; } - if (nla_put(skb, IFLA_PHYS_SWITCH_ID, attr.u.ppid.id_len, - attr.u.ppid.id)) + if (nla_put(skb, IFLA_PHYS_SWITCH_ID, ppid.id_len, ppid.id)) return -EMSGSIZE; return 0; diff --git a/net/core/scm.c b/net/core/scm.c index b1ff8a441748..52ef219cf6df 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -29,6 +29,7 @@ #include <linux/pid.h> #include <linux/nsproxy.h> #include <linux/slab.h> +#include <linux/errqueue.h> #include <linux/uaccess.h> @@ -252,6 +253,32 @@ out: } EXPORT_SYMBOL(put_cmsg); +void put_cmsg_scm_timestamping64(struct msghdr *msg, struct scm_timestamping_internal *tss_internal) +{ + struct scm_timestamping64 tss; + int i; + + for (i = 0; i < ARRAY_SIZE(tss.ts); i++) { + tss.ts[i].tv_sec = tss_internal->ts[i].tv_sec; + tss.ts[i].tv_nsec = tss_internal->ts[i].tv_nsec; + } + + put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPING_NEW, sizeof(tss), &tss); +} +EXPORT_SYMBOL(put_cmsg_scm_timestamping64); + +void put_cmsg_scm_timestamping(struct msghdr *msg, struct scm_timestamping_internal *tss_internal) +{ + struct scm_timestamping tss; + int i; + + for (i = 0; i < ARRAY_SIZE(tss.ts); i++) + tss.ts[i] = timespec64_to_timespec(tss_internal->ts[i]); + + put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPING_OLD, sizeof(tss), &tss); +} +EXPORT_SYMBOL(put_cmsg_scm_timestamping); + void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) { struct cmsghdr __user *cm diff --git a/net/core/skmsg.c b/net/core/skmsg.c index e76ed8df9f13..ae6f06e45737 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -554,8 +554,7 @@ static void sk_psock_destroy_deferred(struct work_struct *gc) struct sk_psock *psock = container_of(gc, struct sk_psock, gc); /* No sk_callback_lock since already detached. */ - if (psock->parser.enabled) - strp_done(&psock->parser.strp); + strp_done(&psock->parser.strp); cancel_work_sync(&psock->work); diff --git a/net/core/sock.c b/net/core/sock.c index 900e8a9435f5..f4b8b78535f8 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -335,14 +335,68 @@ int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(__sk_backlog_rcv); -static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen) +static int sock_get_timeout(long timeo, void *optval, bool old_timeval) { - struct timeval tv; + struct __kernel_sock_timeval tv; + int size; - if (optlen < sizeof(tv)) - return -EINVAL; - if (copy_from_user(&tv, optval, sizeof(tv))) - return -EFAULT; + if (timeo == MAX_SCHEDULE_TIMEOUT) { + tv.tv_sec = 0; + tv.tv_usec = 0; + } else { + tv.tv_sec = timeo / HZ; + tv.tv_usec = ((timeo % HZ) * USEC_PER_SEC) / HZ; + } + + if (in_compat_syscall() && !COMPAT_USE_64BIT_TIME) { + struct old_timeval32 tv32 = { tv.tv_sec, tv.tv_usec }; + *(struct old_timeval32 *)optval = tv32; + return sizeof(tv32); + } + + if (old_timeval) { + struct __kernel_old_timeval old_tv; + old_tv.tv_sec = tv.tv_sec; + old_tv.tv_usec = tv.tv_usec; + *(struct __kernel_old_timeval *)optval = old_tv; + size = sizeof(old_tv); + } else { + *(struct __kernel_sock_timeval *)optval = tv; + size = sizeof(tv); + } + + return size; +} + +static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen, bool old_timeval) +{ + struct __kernel_sock_timeval tv; + + if (in_compat_syscall() && !COMPAT_USE_64BIT_TIME) { + struct old_timeval32 tv32; + + if (optlen < sizeof(tv32)) + return -EINVAL; + + if (copy_from_user(&tv32, optval, sizeof(tv32))) + return -EFAULT; + tv.tv_sec = tv32.tv_sec; + tv.tv_usec = tv32.tv_usec; + } else if (old_timeval) { + struct __kernel_old_timeval old_tv; + + if (optlen < sizeof(old_tv)) + return -EINVAL; + if (copy_from_user(&old_tv, optval, sizeof(old_tv))) + return -EFAULT; + tv.tv_sec = old_tv.tv_sec; + tv.tv_usec = old_tv.tv_usec; + } else { + if (optlen < sizeof(tv)) + return -EINVAL; + if (copy_from_user(&tv, optval, sizeof(tv))) + return -EFAULT; + } if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC) return -EDOM; @@ -360,8 +414,8 @@ static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen) *timeo_p = MAX_SCHEDULE_TIMEOUT; if (tv.tv_sec == 0 && tv.tv_usec == 0) return 0; - if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1)) - *timeo_p = tv.tv_sec * HZ + DIV_ROUND_UP(tv.tv_usec, USEC_PER_SEC / HZ); + if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT / HZ - 1)) + *timeo_p = tv.tv_sec * HZ + DIV_ROUND_UP((unsigned long)tv.tv_usec, USEC_PER_SEC / HZ); return 0; } @@ -731,6 +785,10 @@ int sock_setsockopt(struct socket *sock, int level, int optname, */ val = min_t(u32, val, sysctl_wmem_max); set_sndbuf: + /* Ensure val * 2 fits into an int, to prevent max_t() + * from treating it as a negative value. + */ + val = min_t(int, val, INT_MAX / 2); sk->sk_userlocks |= SOCK_SNDBUF_LOCK; sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF); /* Wake up sending tasks if we upped the value. */ @@ -742,6 +800,12 @@ set_sndbuf: ret = -EPERM; break; } + + /* No negative values (to prevent underflow, as val will be + * multiplied by 2). + */ + if (val < 0) + val = 0; goto set_sndbuf; case SO_RCVBUF: @@ -752,6 +816,10 @@ set_sndbuf: */ val = min_t(u32, val, sysctl_rmem_max); set_rcvbuf: + /* Ensure val * 2 fits into an int, to prevent max_t() + * from treating it as a negative value. + */ + val = min_t(int, val, INT_MAX / 2); sk->sk_userlocks |= SOCK_RCVBUF_LOCK; /* * We double it on the way in to account for @@ -776,6 +844,12 @@ set_rcvbuf: ret = -EPERM; break; } + + /* No negative values (to prevent underflow, as val will be + * multiplied by 2). + */ + if (val < 0) + val = 0; goto set_rcvbuf; case SO_KEEPALIVE: @@ -833,10 +907,17 @@ set_rcvbuf: clear_bit(SOCK_PASSCRED, &sock->flags); break; - case SO_TIMESTAMP: - case SO_TIMESTAMPNS: + case SO_TIMESTAMP_OLD: + case SO_TIMESTAMP_NEW: + case SO_TIMESTAMPNS_OLD: + case SO_TIMESTAMPNS_NEW: if (valbool) { - if (optname == SO_TIMESTAMP) + if (optname == SO_TIMESTAMP_NEW || optname == SO_TIMESTAMPNS_NEW) + sock_set_flag(sk, SOCK_TSTAMP_NEW); + else + sock_reset_flag(sk, SOCK_TSTAMP_NEW); + + if (optname == SO_TIMESTAMP_OLD || optname == SO_TIMESTAMP_NEW) sock_reset_flag(sk, SOCK_RCVTSTAMPNS); else sock_set_flag(sk, SOCK_RCVTSTAMPNS); @@ -845,10 +926,14 @@ set_rcvbuf: } else { sock_reset_flag(sk, SOCK_RCVTSTAMP); sock_reset_flag(sk, SOCK_RCVTSTAMPNS); + sock_reset_flag(sk, SOCK_TSTAMP_NEW); } break; - case SO_TIMESTAMPING: + case SO_TIMESTAMPING_NEW: + sock_set_flag(sk, SOCK_TSTAMP_NEW); + /* fall through */ + case SO_TIMESTAMPING_OLD: if (val & ~SOF_TIMESTAMPING_MASK) { ret = -EINVAL; break; @@ -879,9 +964,13 @@ set_rcvbuf: if (val & SOF_TIMESTAMPING_RX_SOFTWARE) sock_enable_timestamp(sk, SOCK_TIMESTAMPING_RX_SOFTWARE); - else + else { + if (optname == SO_TIMESTAMPING_NEW) + sock_reset_flag(sk, SOCK_TSTAMP_NEW); + sock_disable_timestamp(sk, (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)); + } break; case SO_RCVLOWAT: @@ -893,12 +982,14 @@ set_rcvbuf: sk->sk_rcvlowat = val ? : 1; break; - case SO_RCVTIMEO: - ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen); + case SO_RCVTIMEO_OLD: + case SO_RCVTIMEO_NEW: + ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen, optname == SO_RCVTIMEO_OLD); break; - case SO_SNDTIMEO: - ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen); + case SO_SNDTIMEO_OLD: + case SO_SNDTIMEO_NEW: + ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen, optname == SO_SNDTIMEO_OLD); break; case SO_ATTACH_FILTER: @@ -1121,7 +1212,9 @@ int sock_getsockopt(struct socket *sock, int level, int optname, int val; u64 val64; struct linger ling; - struct timeval tm; + struct old_timeval32 tm32; + struct __kernel_old_timeval tm; + struct __kernel_sock_timeval stm; struct sock_txtime txtime; } v; @@ -1208,39 +1301,36 @@ int sock_getsockopt(struct socket *sock, int level, int optname, sock_warn_obsolete_bsdism("getsockopt"); break; - case SO_TIMESTAMP: + case SO_TIMESTAMP_OLD: v.val = sock_flag(sk, SOCK_RCVTSTAMP) && + !sock_flag(sk, SOCK_TSTAMP_NEW) && !sock_flag(sk, SOCK_RCVTSTAMPNS); break; - case SO_TIMESTAMPNS: - v.val = sock_flag(sk, SOCK_RCVTSTAMPNS); + case SO_TIMESTAMPNS_OLD: + v.val = sock_flag(sk, SOCK_RCVTSTAMPNS) && !sock_flag(sk, SOCK_TSTAMP_NEW); + break; + + case SO_TIMESTAMP_NEW: + v.val = sock_flag(sk, SOCK_RCVTSTAMP) && sock_flag(sk, SOCK_TSTAMP_NEW); break; - case SO_TIMESTAMPING: + case SO_TIMESTAMPNS_NEW: + v.val = sock_flag(sk, SOCK_RCVTSTAMPNS) && sock_flag(sk, SOCK_TSTAMP_NEW); + break; + + case SO_TIMESTAMPING_OLD: v.val = sk->sk_tsflags; break; - case SO_RCVTIMEO: - lv = sizeof(struct timeval); - if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) { - v.tm.tv_sec = 0; - v.tm.tv_usec = 0; - } else { - v.tm.tv_sec = sk->sk_rcvtimeo / HZ; - v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * USEC_PER_SEC) / HZ; - } + case SO_RCVTIMEO_OLD: + case SO_RCVTIMEO_NEW: + lv = sock_get_timeout(sk->sk_rcvtimeo, &v, SO_RCVTIMEO_OLD == optname); break; - case SO_SNDTIMEO: - lv = sizeof(struct timeval); - if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) { - v.tm.tv_sec = 0; - v.tm.tv_usec = 0; - } else { - v.tm.tv_sec = sk->sk_sndtimeo / HZ; - v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * USEC_PER_SEC) / HZ; - } + case SO_SNDTIMEO_OLD: + case SO_SNDTIMEO_NEW: + lv = sock_get_timeout(sk->sk_sndtimeo, &v, SO_SNDTIMEO_OLD == optname); break; case SO_RCVLOWAT: @@ -2147,7 +2237,7 @@ int __sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct cmsghdr *cmsg, return -EINVAL; sockc->mark = *(u32 *)CMSG_DATA(cmsg); break; - case SO_TIMESTAMPING: + case SO_TIMESTAMPING_OLD: if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32))) return -EINVAL; @@ -2405,7 +2495,7 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind) } if (sk_has_memory_pressure(sk)) { - int alloc; + u64 alloc; if (!sk_under_memory_pressure(sk)) return 1; diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h index 6eb837a47b5c..baaaeb2b2c42 100644 --- a/net/dccp/ccid.h +++ b/net/dccp/ccid.h @@ -202,7 +202,7 @@ static inline void ccid_hc_tx_packet_recv(struct ccid *ccid, struct sock *sk, static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk, u8 pkt, u8 opt, u8 *val, u8 len) { - if (ccid->ccid_ops->ccid_hc_tx_parse_options == NULL) + if (!ccid || !ccid->ccid_ops->ccid_hc_tx_parse_options) return 0; return ccid->ccid_ops->ccid_hc_tx_parse_options(sk, pkt, opt, val, len); } @@ -214,7 +214,7 @@ static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk, static inline int ccid_hc_rx_parse_options(struct ccid *ccid, struct sock *sk, u8 pkt, u8 opt, u8 *val, u8 len) { - if (ccid->ccid_ops->ccid_hc_rx_parse_options == NULL) + if (!ccid || !ccid->ccid_ops->ccid_hc_rx_parse_options) return 0; return ccid->ccid_ops->ccid_hc_rx_parse_options(sk, pkt, opt, val, len); } diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index d0b3e69c6b39..0962f9201baa 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -56,7 +56,7 @@ #include <net/dn_neigh.h> #include <net/dn_fib.h> -#define DN_IFREQ_SIZE (sizeof(struct ifreq) - sizeof(struct sockaddr) + sizeof(struct sockaddr_dn)) +#define DN_IFREQ_SIZE (offsetof(struct ifreq, ifr_ifru) + sizeof(struct sockaddr_dn)) static char dn_rt_all_end_mcast[ETH_ALEN] = {0xAB,0x00,0x00,0x04,0x00,0x00}; static char dn_rt_all_rt_mcast[ETH_ALEN] = {0xAB,0x00,0x00,0x03,0x00,0x00}; diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index a1917025e155..8c431e0f3627 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -767,11 +767,10 @@ static int dsa_switch_probe(struct dsa_switch *ds) struct dsa_switch *dsa_switch_alloc(struct device *dev, size_t n) { - size_t size = sizeof(struct dsa_switch) + n * sizeof(struct dsa_port); struct dsa_switch *ds; int i; - ds = devm_kzalloc(dev, size, GFP_KERNEL); + ds = devm_kzalloc(dev, struct_size(ds, ports, n), GFP_KERNEL); if (!ds) return NULL; diff --git a/net/dsa/master.c b/net/dsa/master.c index 79e97d2f2d9b..c58f33931be1 100644 --- a/net/dsa/master.c +++ b/net/dsa/master.c @@ -248,6 +248,8 @@ static void dsa_master_reset_mtu(struct net_device *dev) rtnl_unlock(); } +static struct lock_class_key dsa_master_addr_list_lock_key; + int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp) { int ret; @@ -261,6 +263,8 @@ int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp) wmb(); dev->dsa_ptr = cpu_dp; + lockdep_set_class(&dev->addr_list_lock, + &dsa_master_addr_list_lock_key); ret = dsa_master_ethtool_setup(dev); if (ret) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 91de3a663226..2e5e7c04821b 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -140,11 +140,14 @@ static int dsa_slave_close(struct net_device *dev) static void dsa_slave_change_rx_flags(struct net_device *dev, int change) { struct net_device *master = dsa_slave_to_master(dev); - - if (change & IFF_ALLMULTI) - dev_set_allmulti(master, dev->flags & IFF_ALLMULTI ? 1 : -1); - if (change & IFF_PROMISC) - dev_set_promiscuity(master, dev->flags & IFF_PROMISC ? 1 : -1); + if (dev->flags & IFF_UP) { + if (change & IFF_ALLMULTI) + dev_set_allmulti(master, + dev->flags & IFF_ALLMULTI ? 1 : -1); + if (change & IFF_PROMISC) + dev_set_promiscuity(master, + dev->flags & IFF_PROMISC ? 1 : -1); + } } static void dsa_slave_set_rx_mode(struct net_device *dev) @@ -362,18 +365,23 @@ static int dsa_slave_port_obj_del(struct net_device *dev, return err; } -static int dsa_slave_port_attr_get(struct net_device *dev, - struct switchdev_attr *attr) +static int dsa_slave_get_port_parent_id(struct net_device *dev, + struct netdev_phys_item_id *ppid) { struct dsa_port *dp = dsa_slave_to_port(dev); struct dsa_switch *ds = dp->ds; struct dsa_switch_tree *dst = ds->dst; + ppid->id_len = sizeof(dst->index); + memcpy(&ppid->id, &dst->index, ppid->id_len); + + return 0; +} + +static int dsa_slave_port_attr_get(struct net_device *dev, + struct switchdev_attr *attr) +{ switch (attr->id) { - case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: - attr->u.ppid.id_len = sizeof(dst->index); - memcpy(&attr->u.ppid.id, &dst->index, attr->u.ppid.id_len); - break; case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT: attr->u.brport_flags_support = 0; break; @@ -639,7 +647,7 @@ static int dsa_slave_set_eee(struct net_device *dev, struct ethtool_eee *e) int ret; /* Port's PHY and MAC both need to be EEE capable */ - if (!dev->phydev && !dp->pl) + if (!dev->phydev || !dp->pl) return -ENODEV; if (!ds->ops->set_mac_eee) @@ -659,7 +667,7 @@ static int dsa_slave_get_eee(struct net_device *dev, struct ethtool_eee *e) int ret; /* Port's PHY and MAC both need to be EEE capable */ - if (!dev->phydev && !dp->pl) + if (!dev->phydev || !dp->pl) return -ENODEV; if (!ds->ops->get_mac_eee) @@ -1046,6 +1054,7 @@ static const struct net_device_ops dsa_slave_netdev_ops = { .ndo_get_phys_port_name = dsa_slave_get_phys_port_name, .ndo_setup_tc = dsa_slave_setup_tc, .ndo_get_stats64 = dsa_slave_get_stats64, + .ndo_get_port_parent_id = dsa_slave_get_port_parent_id, }; static const struct switchdev_ops dsa_slave_switchdev_ops = { diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c index da71b9e2af52..927e9c86f745 100644 --- a/net/dsa/tag_ksz.c +++ b/net/dsa/tag_ksz.c @@ -67,6 +67,8 @@ static struct sk_buff *ksz_common_rcv(struct sk_buff *skb, pskb_trim_rcsum(skb, skb->len - len); + skb->offload_fwd_mark = true; + return skb; } diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 5022bc63863a..8e185b5a2bf6 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1072,7 +1072,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg, goto failure; } - fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL); + fi = kzalloc(struct_size(fi, fib_nh, nhs), GFP_KERNEL); if (!fi) goto failure; fi->fib_metrics = ip_fib_metrics_init(fi->fib_net, cfg->fc_mx, diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index a40e48ded10d..b448cf32296c 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -159,7 +159,8 @@ static int unsolicited_report_interval(struct in_device *in_dev) return interval_jiffies; } -static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im); +static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im, + gfp_t gfp); static void igmpv3_del_delrec(struct in_device *in_dev, struct ip_mc_list *im); static void igmpv3_clear_delrec(struct in_device *in_dev); static int sf_setstate(struct ip_mc_list *pmc); @@ -1145,7 +1146,8 @@ static void ip_mc_filter_del(struct in_device *in_dev, __be32 addr) /* * deleted ip_mc_list manipulation */ -static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im) +static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im, + gfp_t gfp) { struct ip_mc_list *pmc; struct net *net = dev_net(in_dev->dev); @@ -1156,7 +1158,7 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im) * for deleted items allows change reports to use common code with * non-deleted or query-response MCA's. */ - pmc = kzalloc(sizeof(*pmc), GFP_KERNEL); + pmc = kzalloc(sizeof(*pmc), gfp); if (!pmc) return; spin_lock_init(&pmc->lock); @@ -1261,7 +1263,7 @@ static void igmpv3_clear_delrec(struct in_device *in_dev) } #endif -static void igmp_group_dropped(struct ip_mc_list *im) +static void __igmp_group_dropped(struct ip_mc_list *im, gfp_t gfp) { struct in_device *in_dev = im->interface; #ifdef CONFIG_IP_MULTICAST @@ -1292,13 +1294,18 @@ static void igmp_group_dropped(struct ip_mc_list *im) return; } /* IGMPv3 */ - igmpv3_add_delrec(in_dev, im); + igmpv3_add_delrec(in_dev, im, gfp); igmp_ifc_event(in_dev); } #endif } +static void igmp_group_dropped(struct ip_mc_list *im) +{ + __igmp_group_dropped(im, GFP_KERNEL); +} + static void igmp_group_added(struct ip_mc_list *im) { struct in_device *in_dev = im->interface; @@ -1400,8 +1407,8 @@ static void ip_mc_hash_remove(struct in_device *in_dev, /* * A socket has joined a multicast group on device dev. */ -static void __ip_mc_inc_group(struct in_device *in_dev, __be32 addr, - unsigned int mode) +static void ____ip_mc_inc_group(struct in_device *in_dev, __be32 addr, + unsigned int mode, gfp_t gfp) { struct ip_mc_list *im; @@ -1415,7 +1422,7 @@ static void __ip_mc_inc_group(struct in_device *in_dev, __be32 addr, } } - im = kzalloc(sizeof(*im), GFP_KERNEL); + im = kzalloc(sizeof(*im), gfp); if (!im) goto out; @@ -1448,6 +1455,12 @@ out: return; } +void __ip_mc_inc_group(struct in_device *in_dev, __be32 addr, gfp_t gfp) +{ + ____ip_mc_inc_group(in_dev, addr, MCAST_EXCLUDE, gfp); +} +EXPORT_SYMBOL(__ip_mc_inc_group); + void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) { __ip_mc_inc_group(in_dev, addr, MCAST_EXCLUDE); @@ -1634,7 +1647,7 @@ static void ip_mc_rejoin_groups(struct in_device *in_dev) * A socket has left a multicast group on device dev */ -void ip_mc_dec_group(struct in_device *in_dev, __be32 addr) +void __ip_mc_dec_group(struct in_device *in_dev, __be32 addr, gfp_t gfp) { struct ip_mc_list *i; struct ip_mc_list __rcu **ip; @@ -1649,7 +1662,7 @@ void ip_mc_dec_group(struct in_device *in_dev, __be32 addr) ip_mc_hash_remove(in_dev, i); *ip = i->next_rcu; in_dev->mc_count--; - igmp_group_dropped(i); + __igmp_group_dropped(i, gfp); ip_mc_clear_src(i); if (!in_dev->dead) @@ -1662,7 +1675,7 @@ void ip_mc_dec_group(struct in_device *in_dev, __be32 addr) } } } -EXPORT_SYMBOL(ip_mc_dec_group); +EXPORT_SYMBOL(__ip_mc_dec_group); /* Device changing type */ diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 1a4e9ff02762..5731670c560b 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -108,6 +108,7 @@ static size_t inet_sk_attr_size(struct sock *sk, + nla_total_size(1) /* INET_DIAG_TOS */ + nla_total_size(1) /* INET_DIAG_TCLASS */ + nla_total_size(4) /* INET_DIAG_MARK */ + + nla_total_size(4) /* INET_DIAG_CLASS_ID */ + nla_total_size(sizeof(struct inet_diag_meminfo)) + nla_total_size(sizeof(struct inet_diag_msg)) + nla_total_size(SK_MEMINFO_VARS * sizeof(u32)) @@ -287,12 +288,19 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, goto errout; } - if (ext & (1 << (INET_DIAG_CLASS_ID - 1))) { + if (ext & (1 << (INET_DIAG_CLASS_ID - 1)) || + ext & (1 << (INET_DIAG_TCLASS - 1))) { u32 classid = 0; #ifdef CONFIG_SOCK_CGROUP_DATA classid = sock_cgroup_classid(&sk->sk_cgrp_data); #endif + /* Fallback to socket priority if class id isn't set. + * Classful qdiscs use it as direct reference to class. + * For cgroup2 classid is always zero. + */ + if (!classid) + classid = sk->sk_priority; if (nla_put_u32(skb, INET_DIAG_CLASS_ID, classid)) goto errout; diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index d757b9642d0d..be778599bfed 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -216,6 +216,7 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base, atomic_set(&p->rid, 0); p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; p->rate_tokens = 0; + p->n_redirects = 0; /* 60*HZ is arbitrary, but chosen enough high so that the first * calculation of tokens is at its maximum. */ diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index d1cef66820d3..ccee9411dae1 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1381,12 +1381,17 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct ip_tunnel *t = netdev_priv(dev); struct ip_tunnel_parm *p = &t->parms; + __be16 o_flags = p->o_flags; + + if ((t->erspan_ver == 1 || t->erspan_ver == 2) && + !t->collect_md) + o_flags |= TUNNEL_KEY; if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) || nla_put_be16(skb, IFLA_GRE_IFLAGS, gre_tnl_flags_to_gre_flags(p->i_flags)) || nla_put_be16(skb, IFLA_GRE_OFLAGS, - gre_tnl_flags_to_gre_flags(p->o_flags)) || + gre_tnl_flags_to_gre_flags(o_flags)) || nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) || nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) || nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) || diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index d7b43e700023..68a21bf75dd0 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -74,6 +74,33 @@ drop: return 0; } +static int vti_input_ipip(struct sk_buff *skb, int nexthdr, __be32 spi, + int encap_type) +{ + struct ip_tunnel *tunnel; + const struct iphdr *iph = ip_hdr(skb); + struct net *net = dev_net(skb->dev); + struct ip_tunnel_net *itn = net_generic(net, vti_net_id); + + tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, + iph->saddr, iph->daddr, 0); + if (tunnel) { + if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) + goto drop; + + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = tunnel; + + skb->dev = tunnel->dev; + + return xfrm_input(skb, nexthdr, spi, encap_type); + } + + return -EINVAL; +drop: + kfree_skb(skb); + return 0; +} + static int vti_rcv(struct sk_buff *skb) { XFRM_SPI_SKB_CB(skb)->family = AF_INET; @@ -82,6 +109,14 @@ static int vti_rcv(struct sk_buff *skb) return vti_input(skb, ip_hdr(skb)->protocol, 0, 0); } +static int vti_rcv_ipip(struct sk_buff *skb) +{ + XFRM_SPI_SKB_CB(skb)->family = AF_INET; + XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr); + + return vti_input_ipip(skb, ip_hdr(skb)->protocol, ip_hdr(skb)->saddr, 0); +} + static int vti_rcv_cb(struct sk_buff *skb, int err) { unsigned short family; @@ -435,6 +470,12 @@ static struct xfrm4_protocol vti_ipcomp4_protocol __read_mostly = { .priority = 100, }; +static struct xfrm_tunnel ipip_handler __read_mostly = { + .handler = vti_rcv_ipip, + .err_handler = vti4_err, + .priority = 0, +}; + static int __net_init vti_init_net(struct net *net) { int err; @@ -603,6 +644,13 @@ static int __init vti_init(void) if (err < 0) goto xfrm_proto_comp_failed; + msg = "ipip tunnel"; + err = xfrm4_tunnel_register(&ipip_handler, AF_INET); + if (err < 0) { + pr_info("%s: cant't register tunnel\n",__func__); + goto xfrm_tunnel_failed; + } + msg = "netlink interface"; err = rtnl_link_register(&vti_link_ops); if (err < 0) @@ -612,6 +660,8 @@ static int __init vti_init(void) rtnl_link_failed: xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP); +xfrm_tunnel_failed: + xfrm4_tunnel_deregister(&ipip_handler, AF_INET); xfrm_proto_comp_failed: xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH); xfrm_proto_ah_failed: diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index b9a9873c25c6..9bcca08efec9 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -85,7 +85,6 @@ /* Define the friendly delay before and after opening net devices */ #define CONF_POST_OPEN 10 /* After opening: 10 msecs */ -#define CONF_CARRIER_TIMEOUT 120000 /* Wait for carrier timeout */ /* Define the timeout for waiting for a DHCP/BOOTP/RARP reply */ #define CONF_OPEN_RETRIES 2 /* (Re)open devices twice */ @@ -101,6 +100,9 @@ #define NONE cpu_to_be32(INADDR_NONE) #define ANY cpu_to_be32(INADDR_ANY) +/* Wait for carrier timeout default in seconds */ +static unsigned int carrier_timeout = 120; + /* * Public IP configuration */ @@ -268,9 +270,9 @@ static int __init ic_open_devs(void) /* wait for a carrier on at least one device */ start = jiffies; - next_msg = start + msecs_to_jiffies(CONF_CARRIER_TIMEOUT/12); + next_msg = start + msecs_to_jiffies(20000); while (time_before(jiffies, start + - msecs_to_jiffies(CONF_CARRIER_TIMEOUT))) { + msecs_to_jiffies(carrier_timeout * 1000))) { int wait, elapsed; for_each_netdev(&init_net, dev) @@ -283,9 +285,9 @@ static int __init ic_open_devs(void) continue; elapsed = jiffies_to_msecs(jiffies - start); - wait = (CONF_CARRIER_TIMEOUT - elapsed + 500)/1000; + wait = (carrier_timeout * 1000 - elapsed + 500) / 1000; pr_info("Waiting up to %d more seconds for network.\n", wait); - next_msg = jiffies + msecs_to_jiffies(CONF_CARRIER_TIMEOUT/12); + next_msg = jiffies + msecs_to_jiffies(20000); } have_carrier: rtnl_unlock(); @@ -1780,3 +1782,18 @@ static int __init vendor_class_identifier_setup(char *addrs) return 1; } __setup("dhcpclass=", vendor_class_identifier_setup); + +static int __init set_carrier_timeout(char *str) +{ + ssize_t ret; + + if (!str) + return 0; + + ret = kstrtouint(str, 0, &carrier_timeout); + if (ret) + return 0; + + return 1; +} +__setup("carrier_timeout=", set_carrier_timeout); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index fb99002c3d4e..e536970557dd 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -67,7 +67,6 @@ #include <net/fib_rules.h> #include <linux/netconf.h> #include <net/nexthop.h> -#include <net/switchdev.h> #include <linux/nospec.h> @@ -837,10 +836,8 @@ static void ipmr_update_thresholds(struct mr_table *mrt, struct mr_mfc *cache, static int vif_add(struct net *net, struct mr_table *mrt, struct vifctl *vifc, int mrtsock) { + struct netdev_phys_item_id ppid = { }; int vifi = vifc->vifc_vifi; - struct switchdev_attr attr = { - .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, - }; struct vif_device *v = &mrt->vif_table[vifi]; struct net_device *dev; struct in_device *in_dev; @@ -919,10 +916,10 @@ static int vif_add(struct net *net, struct mr_table *mrt, vifc->vifc_flags | (!mrtsock ? VIFF_STATIC : 0), (VIFF_TUNNEL | VIFF_REGISTER)); - attr.orig_dev = dev; - if (!switchdev_port_attr_get(dev, &attr)) { - memcpy(v->dev_parent_id.id, attr.u.ppid.id, attr.u.ppid.id_len); - v->dev_parent_id.id_len = attr.u.ppid.id_len; + err = dev_get_port_parent_id(dev, &ppid, true); + if (err == 0) { + memcpy(v->dev_parent_id.id, ppid.id, ppid.id_len); + v->dev_parent_id.id_len = ppid.id_len; } else { v->dev_parent_id.id_len = 0; } diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 91b369bc44f9..835d50b279f5 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -846,9 +846,9 @@ static int clusterip_net_init(struct net *net) static void clusterip_net_exit(struct net *net) { +#ifdef CONFIG_PROC_FS struct clusterip_net *cn = clusterip_pernet(net); -#ifdef CONFIG_PROC_FS mutex_lock(&cn->mutex); proc_remove(cn->procdir); cn->procdir = NULL; diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c index e26165af45cb..4b07eb8a9b18 100644 --- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c @@ -215,6 +215,7 @@ int nf_nat_icmp_reply_translation(struct sk_buff *skb, /* Change outer to look like the reply to an incoming packet */ nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple); + target.dst.protonum = IPPROTO_ICMP; if (!nf_nat_ipv4_manip_pkt(skb, 0, &target, manip)) return 0; diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic_main.c b/net/ipv4/netfilter/nf_nat_snmp_basic_main.c index a0aa13bcabda..0a8a60c1bf9a 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic_main.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic_main.c @@ -105,6 +105,8 @@ static void fast_csum(struct snmp_ctx *ctx, unsigned char offset) int snmp_version(void *context, size_t hdrlen, unsigned char tag, const void *data, size_t datalen) { + if (datalen != 1) + return -EINVAL; if (*(unsigned char *)data > 1) return -ENOTSUPP; return 1; @@ -114,8 +116,11 @@ int snmp_helper(void *context, size_t hdrlen, unsigned char tag, const void *data, size_t datalen) { struct snmp_ctx *ctx = (struct snmp_ctx *)context; - __be32 *pdata = (__be32 *)data; + __be32 *pdata; + if (datalen != 4) + return -EINVAL; + pdata = (__be32 *)data; if (*pdata == ctx->from) { pr_debug("%s: %pI4 to %pI4\n", __func__, (void *)&ctx->from, (void *)&ctx->to); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 99be68b15da0..ecc12a768191 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -887,13 +887,15 @@ void ip_rt_send_redirect(struct sk_buff *skb) /* No redirected packets during ip_rt_redirect_silence; * reset the algorithm. */ - if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence)) + if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence)) { peer->rate_tokens = 0; + peer->n_redirects = 0; + } /* Too many ignored redirects; do not send anything * set dst.rate_last to the last seen redirected packet. */ - if (peer->rate_tokens >= ip_rt_redirect_number) { + if (peer->n_redirects >= ip_rt_redirect_number) { peer->rate_last = jiffies; goto out_put_peer; } @@ -910,6 +912,7 @@ void ip_rt_send_redirect(struct sk_buff *skb) icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw); peer->rate_last = jiffies; ++peer->rate_tokens; + ++peer->n_redirects; #ifdef CONFIG_IP_ROUTE_VERBOSE if (log_martians && peer->rate_tokens == ip_rt_redirect_number) @@ -1608,7 +1611,8 @@ int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr, return -EINVAL; if (ipv4_is_zeronet(saddr)) { - if (!ipv4_is_local_multicast(daddr)) + if (!ipv4_is_local_multicast(daddr) && + ip_hdr(skb)->protocol != IPPROTO_IGMP) return -EINVAL; } else { err = fib_validate_source(skb, saddr, 0, tos, 0, dev, diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 6f8d292ad501..cab6b2f2f61d 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1844,57 +1844,78 @@ out: #endif static void tcp_update_recv_tstamps(struct sk_buff *skb, - struct scm_timestamping *tss) + struct scm_timestamping_internal *tss) { if (skb->tstamp) - tss->ts[0] = ktime_to_timespec(skb->tstamp); + tss->ts[0] = ktime_to_timespec64(skb->tstamp); else - tss->ts[0] = (struct timespec) {0}; + tss->ts[0] = (struct timespec64) {0}; if (skb_hwtstamps(skb)->hwtstamp) - tss->ts[2] = ktime_to_timespec(skb_hwtstamps(skb)->hwtstamp); + tss->ts[2] = ktime_to_timespec64(skb_hwtstamps(skb)->hwtstamp); else - tss->ts[2] = (struct timespec) {0}; + tss->ts[2] = (struct timespec64) {0}; } /* Similar to __sock_recv_timestamp, but does not require an skb */ static void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk, - struct scm_timestamping *tss) + struct scm_timestamping_internal *tss) { - struct timeval tv; + int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW); bool has_timestamping = false; if (tss->ts[0].tv_sec || tss->ts[0].tv_nsec) { if (sock_flag(sk, SOCK_RCVTSTAMP)) { if (sock_flag(sk, SOCK_RCVTSTAMPNS)) { - put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, - sizeof(tss->ts[0]), &tss->ts[0]); - } else { - tv.tv_sec = tss->ts[0].tv_sec; - tv.tv_usec = tss->ts[0].tv_nsec / 1000; + if (new_tstamp) { + struct __kernel_timespec kts = {tss->ts[0].tv_sec, tss->ts[0].tv_nsec}; + + put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW, + sizeof(kts), &kts); + } else { + struct timespec ts_old = timespec64_to_timespec(tss->ts[0]); - put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, - sizeof(tv), &tv); + put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD, + sizeof(ts_old), &ts_old); + } + } else { + if (new_tstamp) { + struct __kernel_sock_timeval stv; + + stv.tv_sec = tss->ts[0].tv_sec; + stv.tv_usec = tss->ts[0].tv_nsec / 1000; + put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW, + sizeof(stv), &stv); + } else { + struct __kernel_old_timeval tv; + + tv.tv_sec = tss->ts[0].tv_sec; + tv.tv_usec = tss->ts[0].tv_nsec / 1000; + put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD, + sizeof(tv), &tv); + } } } if (sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) has_timestamping = true; else - tss->ts[0] = (struct timespec) {0}; + tss->ts[0] = (struct timespec64) {0}; } if (tss->ts[2].tv_sec || tss->ts[2].tv_nsec) { if (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) has_timestamping = true; else - tss->ts[2] = (struct timespec) {0}; + tss->ts[2] = (struct timespec64) {0}; } if (has_timestamping) { - tss->ts[1] = (struct timespec) {0}; - put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING, - sizeof(*tss), tss); + tss->ts[1] = (struct timespec64) {0}; + if (sock_flag(sk, SOCK_TSTAMP_NEW)) + put_cmsg_scm_timestamping64(msg, tss); + else + put_cmsg_scm_timestamping(msg, tss); } } @@ -1935,7 +1956,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, long timeo; struct sk_buff *skb, *last; u32 urg_hole = 0; - struct scm_timestamping tss; + struct scm_timestamping_internal tss; bool has_tss = false; bool has_cmsg; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index dcb1d434f7da..da5a21050ba9 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1200,7 +1200,8 @@ check_cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long *expires) list_for_each_entry(ifa, &idev->addr_list, if_list) { if (ifa == ifp) continue; - if (!ipv6_prefix_equal(&ifa->addr, &ifp->addr, + if (ifa->prefix_len != ifp->prefix_len || + !ipv6_prefix_equal(&ifa->addr, &ifp->addr, ifp->prefix_len)) continue; if (ifa->flags & (IFA_F_PERMANENT | IFA_F_NOPREFIXROUTE)) diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c index 5cd0029d930e..6c79af056d9b 100644 --- a/net/ipv6/addrconf_core.c +++ b/net/ipv6/addrconf_core.c @@ -134,6 +134,11 @@ static int eafnosupport_ipv6_dst_lookup(struct net *net, struct sock *u1, return -EAFNOSUPPORT; } +static int eafnosupport_ipv6_route_input(struct sk_buff *skb) +{ + return -EAFNOSUPPORT; +} + static struct fib6_table *eafnosupport_fib6_get_table(struct net *net, u32 id) { return NULL; @@ -170,6 +175,7 @@ eafnosupport_ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr, const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) { .ipv6_dst_lookup = eafnosupport_ipv6_dst_lookup, + .ipv6_route_input = eafnosupport_ipv6_route_input, .fib6_get_table = eafnosupport_fib6_get_table, .fib6_table_lookup = eafnosupport_fib6_table_lookup, .fib6_lookup = eafnosupport_fib6_lookup, diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index d99753b5e39b..2f45d2a3e3a3 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -900,10 +900,17 @@ static struct pernet_operations inet6_net_ops = { .exit = inet6_net_exit, }; +static int ipv6_route_input(struct sk_buff *skb) +{ + ip6_route_input(skb); + return skb_dst(skb)->error; +} + static const struct ipv6_stub ipv6_stub_impl = { .ipv6_sock_mc_join = ipv6_sock_mc_join, .ipv6_sock_mc_drop = ipv6_sock_mc_drop, .ipv6_dst_lookup = ip6_dst_lookup, + .ipv6_route_input = ipv6_route_input, .fib6_get_table = fib6_get_table, .fib6_table_lookup = fib6_table_lookup, .fib6_lookup = fib6_lookup, diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index e081e69d534e..65a4f96dc462 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -2098,12 +2098,17 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); struct __ip6_tnl_parm *p = &t->parms; + __be16 o_flags = p->o_flags; + + if ((p->erspan_ver == 1 || p->erspan_ver == 2) && + !p->collect_md) + o_flags |= TUNNEL_KEY; if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) || nla_put_be16(skb, IFLA_GRE_IFLAGS, gre_tnl_flags_to_gre_flags(p->i_flags)) || nla_put_be16(skb, IFLA_GRE_OFLAGS, - gre_tnl_flags_to_gre_flags(p->o_flags)) || + gre_tnl_flags_to_gre_flags(o_flags)) || nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) || nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) || nla_put_in6_addr(skb, IFLA_GRE_LOCAL, &p->laddr) || diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 30337b38274b..cc01aa3f2b5e 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1516,6 +1516,9 @@ static void mroute_clean_tables(struct mr_table *mrt, bool all) continue; rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params); list_del_rcu(&c->list); + call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net), + FIB_EVENT_ENTRY_DEL, + (struct mfc6_cache *)c, mrt->id); mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE); mr_cache_put(c); } @@ -1524,10 +1527,6 @@ static void mroute_clean_tables(struct mr_table *mrt, bool all) spin_lock_bh(&mfc_unres_lock); list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) { list_del(&c->list); - call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net), - FIB_EVENT_ENTRY_DEL, - (struct mfc6_cache *)c, - mrt->id); mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE); ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c); diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index a8263031f3a6..1240ccd57f39 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -23,9 +23,11 @@ int ip6_route_me_harder(struct net *net, struct sk_buff *skb) struct sock *sk = sk_to_full_sk(skb->sk); unsigned int hh_len; struct dst_entry *dst; + int strict = (ipv6_addr_type(&iph->daddr) & + (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)); struct flowi6 fl6 = { .flowi6_oif = sk && sk->sk_bound_dev_if ? sk->sk_bound_dev_if : - rt6_need_strict(&iph->daddr) ? skb_dst(skb)->dev->ifindex : 0, + strict ? skb_dst(skb)->dev->ifindex : 0, .flowi6_mark = skb->mark, .flowi6_uid = sock_net_uid(net, sk), .daddr = iph->daddr, diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c index b52026adb3e7..490bfd3c9162 100644 --- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c @@ -227,6 +227,7 @@ int nf_nat_icmpv6_reply_translation(struct sk_buff *skb, } nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple); + target.dst.protonum = IPPROTO_ICMPV6; if (!nf_nat_ipv6_manip_pkt(skb, 0, &target, manip)) return 0; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index dc066fdf7e46..87a0561136dd 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2277,14 +2277,8 @@ static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu) static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt) { - bool from_set; - - rcu_read_lock(); - from_set = !!rcu_dereference(rt->from); - rcu_read_unlock(); - return !(rt->rt6i_flags & RTF_CACHE) && - (rt->rt6i_flags & RTF_PCPU || from_set); + (rt->rt6i_flags & RTF_PCPU || rcu_access_pointer(rt->from)); } static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c index 8d0ba757a46c..9b2f272ca164 100644 --- a/net/ipv6/seg6.c +++ b/net/ipv6/seg6.c @@ -221,9 +221,7 @@ static int seg6_genl_get_tunsrc(struct sk_buff *skb, struct genl_info *info) rcu_read_unlock(); genlmsg_end(msg, hdr); - genlmsg_reply(msg, info); - - return 0; + return genlmsg_reply(msg, info); nla_put_failure: rcu_read_unlock(); diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index 8181ee7e1e27..ee5403cbe655 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -146,6 +146,8 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto) } else { ip6_flow_hdr(hdr, 0, flowlabel); hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb)); + + memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); } hdr->nexthdr = NEXTHDR_ROUTING; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 1e03305c0549..e8a1dabef803 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -546,7 +546,8 @@ static int ipip6_err(struct sk_buff *skb, u32 info) } err = 0; - if (!ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4, type, data_len)) + if (__in6_dev_get(skb->dev) && + !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4, type, data_len)) goto out; if (t->parms.iph.daddr == 0) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 26f1d435696a..fed6becc5daf 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -83,8 +83,7 @@ #define L2TP_SLFLAG_S 0x40000000 #define L2TP_SL_SEQ_MASK 0x00ffffff -#define L2TP_HDR_SIZE_SEQ 10 -#define L2TP_HDR_SIZE_NOSEQ 6 +#define L2TP_HDR_SIZE_MAX 14 /* Default trace flags */ #define L2TP_DEFAULT_DEBUG_FLAGS 0 @@ -808,7 +807,7 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb) __skb_pull(skb, sizeof(struct udphdr)); /* Short packet? */ - if (!pskb_may_pull(skb, L2TP_HDR_SIZE_SEQ)) { + if (!pskb_may_pull(skb, L2TP_HDR_SIZE_MAX)) { l2tp_info(tunnel, L2TP_MSG_DATA, "%s: recv short packet (len=%d)\n", tunnel->name, skb->len); @@ -884,6 +883,10 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb) goto error; } + if (tunnel->version == L2TP_HDR_VER_3 && + l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr)) + goto error; + l2tp_recv_common(session, skb, ptr, optr, hdrflags, length); l2tp_session_dec_refcount(session); diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 9c9afe94d389..b2ce90260c35 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -301,6 +301,26 @@ static inline bool l2tp_tunnel_uses_xfrm(const struct l2tp_tunnel *tunnel) } #endif +static inline int l2tp_v3_ensure_opt_in_linear(struct l2tp_session *session, struct sk_buff *skb, + unsigned char **ptr, unsigned char **optr) +{ + int opt_len = session->peer_cookie_len + l2tp_get_l2specific_len(session); + + if (opt_len > 0) { + int off = *ptr - *optr; + + if (!pskb_may_pull(skb, off + opt_len)) + return -1; + + if (skb->data != *optr) { + *optr = skb->data; + *ptr = skb->data + off; + } + } + + return 0; +} + #define l2tp_printk(ptr, type, func, fmt, ...) \ do { \ if (((ptr)->debug) & (type)) \ diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 35f6f86d4dcc..d4c60523c549 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -165,6 +165,9 @@ static int l2tp_ip_recv(struct sk_buff *skb) print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length); } + if (l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr)) + goto discard_sess; + l2tp_recv_common(session, skb, ptr, optr, 0, skb->len); l2tp_session_dec_refcount(session); diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 237f1a4a0b0c..0ae6899edac0 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -178,6 +178,9 @@ static int l2tp_ip6_recv(struct sk_buff *skb) print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length); } + if (l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr)) + goto discard_sess; + l2tp_recv_common(session, skb, ptr, optr, 0, skb->len); l2tp_session_dec_refcount(session); diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 69e831bc317b..2c4cd4183bf9 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -8,7 +8,7 @@ * Copyright 2007, Michael Wu <flamingice@sourmilk.net> * Copyright 2007-2010, Intel Corporation * Copyright(c) 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018 Intel Corporation + * Copyright (C) 2018 - 2019 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -229,7 +229,7 @@ ieee80211_agg_start_txq(struct sta_info *sta, int tid, bool enable) clear_bit(IEEE80211_TXQ_STOP, &txqi->flags); local_bh_disable(); rcu_read_lock(); - drv_wake_tx_queue(sta->sdata->local, txqi); + schedule_and_wake_txq(sta->sdata->local, txqi); rcu_read_unlock(); local_bh_enable(); } @@ -366,6 +366,8 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, set_bit(HT_AGG_STATE_STOPPING, &tid_tx->state); + ieee80211_agg_stop_txq(sta, tid); + spin_unlock_bh(&sta->lock); ht_dbg(sta->sdata, "Tx BA session stop requested for %pM tid %u\n", diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 2493c74c2d37..d65aa019ce85 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1231,6 +1231,11 @@ static void sta_apply_mesh_params(struct ieee80211_local *local, ieee80211_mps_sta_status_update(sta); changed |= ieee80211_mps_set_sta_local_pm(sta, sdata->u.mesh.mshcfg.power_mode); + + ewma_mesh_tx_rate_avg_init(&sta->mesh->tx_rate_avg); + /* init at low value */ + ewma_mesh_tx_rate_avg_add(&sta->mesh->tx_rate_avg, 10); + break; case NL80211_PLINK_LISTEN: case NL80211_PLINK_BLOCKED: @@ -1447,6 +1452,9 @@ static int sta_apply_parameters(struct ieee80211_local *local, if (ieee80211_vif_is_mesh(&sdata->vif)) sta_apply_mesh_params(local, sta, params); + if (params->airtime_weight) + sta->airtime_weight = params->airtime_weight; + /* set the STA state after all sta info from usermode has been set */ if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) || set & BIT(NL80211_STA_FLAG_ASSOCIATED)) { @@ -1746,7 +1754,9 @@ static void mpath_set_pinfo(struct mesh_path *mpath, u8 *next_hop, MPATH_INFO_EXPTIME | MPATH_INFO_DISCOVERY_TIMEOUT | MPATH_INFO_DISCOVERY_RETRIES | - MPATH_INFO_FLAGS; + MPATH_INFO_FLAGS | + MPATH_INFO_HOP_COUNT | + MPATH_INFO_PATH_CHANGE; pinfo->frame_qlen = mpath->frame_queue.qlen; pinfo->sn = mpath->sn; @@ -1766,6 +1776,8 @@ static void mpath_set_pinfo(struct mesh_path *mpath, u8 *next_hop, pinfo->flags |= NL80211_MPATH_FLAG_FIXED; if (mpath->flags & MESH_PATH_RESOLVED) pinfo->flags |= NL80211_MPATH_FLAG_RESOLVED; + pinfo->hop_count = mpath->hop_count; + pinfo->path_change_count = mpath->path_change_count; } static int ieee80211_get_mpath(struct wiphy *wiphy, struct net_device *dev, diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 3fe541e358f3..343ad0a915e4 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -218,6 +218,7 @@ static const char *hw_flag_names[] = { FLAG(BUFF_MMPDU_TXQ), FLAG(SUPPORTS_VHT_EXT_NSS_BW), FLAG(STA_MMPDU_TXQ), + FLAG(TX_STATUS_NO_AMPDU_LEN), #undef FLAG }; @@ -383,6 +384,9 @@ void debugfs_hw_add(struct ieee80211_local *local) if (local->ops->wake_tx_queue) DEBUGFS_ADD_MODE(aqm, 0600); + debugfs_create_u16("airtime_flags", 0600, + phyd, &local->airtime_flags); + statsd = debugfs_create_dir("statistics", phyd); /* if the dir failed, don't put all the other things into the root! */ diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index b753194710ad..3aa618dcc58e 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -181,9 +181,9 @@ static ssize_t sta_aqm_read(struct file *file, char __user *userbuf, txqi->tin.tx_bytes, txqi->tin.tx_packets, txqi->flags, - txqi->flags & (1<<IEEE80211_TXQ_STOP) ? "STOP" : "RUN", - txqi->flags & (1<<IEEE80211_TXQ_AMPDU) ? " AMPDU" : "", - txqi->flags & (1<<IEEE80211_TXQ_NO_AMSDU) ? " NO-AMSDU" : ""); + test_bit(IEEE80211_TXQ_STOP, &txqi->flags) ? "STOP" : "RUN", + test_bit(IEEE80211_TXQ_AMPDU, &txqi->flags) ? " AMPDU" : "", + test_bit(IEEE80211_TXQ_NO_AMSDU, &txqi->flags) ? " NO-AMSDU" : ""); } rcu_read_unlock(); @@ -195,6 +195,64 @@ static ssize_t sta_aqm_read(struct file *file, char __user *userbuf, } STA_OPS(aqm); +static ssize_t sta_airtime_read(struct file *file, char __user *userbuf, + size_t count, loff_t *ppos) +{ + struct sta_info *sta = file->private_data; + struct ieee80211_local *local = sta->sdata->local; + size_t bufsz = 200; + char *buf = kzalloc(bufsz, GFP_KERNEL), *p = buf; + u64 rx_airtime = 0, tx_airtime = 0; + s64 deficit[IEEE80211_NUM_ACS]; + ssize_t rv; + int ac; + + if (!buf) + return -ENOMEM; + + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { + spin_lock_bh(&local->active_txq_lock[ac]); + rx_airtime += sta->airtime[ac].rx_airtime; + tx_airtime += sta->airtime[ac].tx_airtime; + deficit[ac] = sta->airtime[ac].deficit; + spin_unlock_bh(&local->active_txq_lock[ac]); + } + + p += scnprintf(p, bufsz + buf - p, + "RX: %llu us\nTX: %llu us\nWeight: %u\n" + "Deficit: VO: %lld us VI: %lld us BE: %lld us BK: %lld us\n", + rx_airtime, + tx_airtime, + sta->airtime_weight, + deficit[0], + deficit[1], + deficit[2], + deficit[3]); + + rv = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf); + kfree(buf); + return rv; +} + +static ssize_t sta_airtime_write(struct file *file, const char __user *userbuf, + size_t count, loff_t *ppos) +{ + struct sta_info *sta = file->private_data; + struct ieee80211_local *local = sta->sdata->local; + int ac; + + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { + spin_lock_bh(&local->active_txq_lock[ac]); + sta->airtime[ac].rx_airtime = 0; + sta->airtime[ac].tx_airtime = 0; + sta->airtime[ac].deficit = sta->airtime_weight; + spin_unlock_bh(&local->active_txq_lock[ac]); + } + + return count; +} +STA_OPS_RW(airtime); + static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) { @@ -906,6 +964,10 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta) if (local->ops->wake_tx_queue) DEBUGFS_ADD(aqm); + if (wiphy_ext_feature_isset(local->hw.wiphy, + NL80211_EXT_FEATURE_AIRTIME_FAIRNESS)) + DEBUGFS_ADD(airtime); + if (sizeof(sta->driver_buffered_tids) == sizeof(u32)) debugfs_create_x32("driver_buffered_tids", 0400, sta->debugfs_dir, diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 3e0d5922a440..ba3c07b10cd0 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -1173,6 +1173,13 @@ static inline void drv_wake_tx_queue(struct ieee80211_local *local, local->ops->wake_tx_queue(&local->hw, &txq->txq); } +static inline void schedule_and_wake_txq(struct ieee80211_local *local, + struct txq_info *txqi) +{ + ieee80211_schedule_txq(&local->hw, &txqi->txq); + drv_wake_tx_queue(local, txqi); +} + static inline int drv_can_aggregate_in_amsdu(struct ieee80211_local *local, struct sk_buff *head, struct sk_buff *skb) diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index f849ea814993..e03c46ac8e4d 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -107,6 +107,14 @@ void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata, __check_htcap_enable(ht_capa, ht_capa_mask, ht_cap, IEEE80211_HT_CAP_40MHZ_INTOLERANT); + /* Allow user to enable TX STBC bit */ + __check_htcap_enable(ht_capa, ht_capa_mask, ht_cap, + IEEE80211_HT_CAP_TX_STBC); + + /* Allow user to configure RX STBC bits */ + if (ht_capa_mask->cap_info & IEEE80211_HT_CAP_RX_STBC) + ht_cap->cap |= ht_capa->cap_info & IEEE80211_HT_CAP_RX_STBC; + /* Allow user to decrease AMPDU factor */ if (ht_capa_mask->ampdu_params_info & IEEE80211_HT_AMPDU_PARM_FACTOR) { diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 7dfb4e2f98b2..056b16bce3b0 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -831,6 +831,8 @@ enum txq_info_flags { * a fq_flow which is already owned by a different tin * @def_cvars: codel vars for @def_flow * @frags: used to keep fragments created after dequeue + * @schedule_order: used with ieee80211_local->active_txqs + * @schedule_round: counter to prevent infinite loops on TXQ scheduling */ struct txq_info { struct fq_tin tin; @@ -838,6 +840,8 @@ struct txq_info { struct codel_vars def_cvars; struct codel_stats cstats; struct sk_buff_head frags; + struct list_head schedule_order; + u16 schedule_round; unsigned long flags; /* keep last! */ @@ -1129,6 +1133,13 @@ struct ieee80211_local { struct codel_vars *cvars; struct codel_params cparams; + /* protects active_txqs and txqi->schedule_order */ + spinlock_t active_txq_lock[IEEE80211_NUM_ACS]; + struct list_head active_txqs[IEEE80211_NUM_ACS]; + u16 schedule_round[IEEE80211_NUM_ACS]; + + u16 airtime_flags; + const struct ieee80211_ops *ops; /* diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 87a729926734..71005b6dfcd1 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -478,6 +478,8 @@ static const struct ieee80211_ht_cap mac80211_ht_capa_mod_mask = { IEEE80211_HT_CAP_MAX_AMSDU | IEEE80211_HT_CAP_SGI_20 | IEEE80211_HT_CAP_SGI_40 | + IEEE80211_HT_CAP_TX_STBC | + IEEE80211_HT_CAP_RX_STBC | IEEE80211_HT_CAP_LDPC_CODING | IEEE80211_HT_CAP_40MHZ_INTOLERANT), .mcs = { @@ -663,6 +665,12 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, spin_lock_init(&local->rx_path_lock); spin_lock_init(&local->queue_stop_reason_lock); + for (i = 0; i < IEEE80211_NUM_ACS; i++) { + INIT_LIST_HEAD(&local->active_txqs[i]); + spin_lock_init(&local->active_txq_lock[i]); + } + local->airtime_flags = AIRTIME_USE_TX | AIRTIME_USE_RX; + INIT_LIST_HEAD(&local->chanctx_list); mutex_init(&local->chanctx_mtx); @@ -1148,6 +1156,9 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) if (!local->hw.max_nan_de_entries) local->hw.max_nan_de_entries = IEEE80211_MAX_NAN_INSTANCE_ID; + if (!local->hw.weight_multiplier) + local->hw.weight_multiplier = 1; + result = ieee80211_wep_init(local); if (result < 0) wiphy_debug(local->hw.wiphy, "Failed to initialize wep: %d\n", diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index cad6592c52a1..8b26858ab4d5 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -94,6 +94,7 @@ enum mesh_deferred_task_flags { * @last_preq_to_root: Timestamp of last PREQ sent to root * @is_root: the destination station of this path is a root node * @is_gate: the destination station of this path is a mesh gate + * @path_change_count: the number of path changes to destination * * * The dst address is unique in the mesh path table. Since the mesh_path is @@ -124,6 +125,7 @@ struct mesh_path { unsigned long last_preq_to_root; bool is_root; bool is_gate; + u32 path_change_count; }; /** diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 6950cd0bf594..e00284afdda5 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -300,6 +300,7 @@ void ieee80211s_update_metric(struct ieee80211_local *local, { struct ieee80211_tx_info *txinfo = st->info; int failed; + struct rate_info rinfo; failed = !(txinfo->flags & IEEE80211_TX_STAT_ACK); @@ -310,12 +311,15 @@ void ieee80211s_update_metric(struct ieee80211_local *local, if (ewma_mesh_fail_avg_read(&sta->mesh->fail_avg) > LINK_FAIL_THRESH) mesh_plink_broken(sta); + + sta_set_rate_info_tx(sta, &sta->tx_stats.last_rate, &rinfo); + ewma_mesh_tx_rate_avg_add(&sta->mesh->tx_rate_avg, + cfg80211_calculate_bitrate(&rinfo)); } static u32 airtime_link_metric_get(struct ieee80211_local *local, struct sta_info *sta) { - struct rate_info rinfo; /* This should be adjusted for each device */ int device_constant = 1 << ARITH_SHIFT; int test_frame_len = TEST_FRAME_LEN << ARITH_SHIFT; @@ -339,8 +343,7 @@ static u32 airtime_link_metric_get(struct ieee80211_local *local, if (fail_avg > LINK_FAIL_THRESH) return MAX_METRIC; - sta_set_rate_info_tx(sta, &sta->tx_stats.last_rate, &rinfo); - rate = cfg80211_calculate_bitrate(&rinfo); + rate = ewma_mesh_tx_rate_avg_read(&sta->mesh->tx_rate_avg); if (WARN_ON(!rate)) return MAX_METRIC; @@ -386,6 +389,7 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata, unsigned long orig_lifetime, exp_time; u32 last_hop_metric, new_metric; bool process = true; + u8 hopcount; rcu_read_lock(); sta = sta_info_get(sdata, mgmt->sa); @@ -404,6 +408,7 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata, orig_sn = PREQ_IE_ORIG_SN(hwmp_ie); orig_lifetime = PREQ_IE_LIFETIME(hwmp_ie); orig_metric = PREQ_IE_METRIC(hwmp_ie); + hopcount = PREQ_IE_HOPCOUNT(hwmp_ie) + 1; break; case MPATH_PREP: /* Originator here refers to the MP that was the target in the @@ -415,6 +420,7 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata, orig_sn = PREP_IE_TARGET_SN(hwmp_ie); orig_lifetime = PREP_IE_LIFETIME(hwmp_ie); orig_metric = PREP_IE_METRIC(hwmp_ie); + hopcount = PREP_IE_HOPCOUNT(hwmp_ie) + 1; break; default: rcu_read_unlock(); @@ -441,7 +447,10 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata, (mpath->flags & MESH_PATH_SN_VALID)) { if (SN_GT(mpath->sn, orig_sn) || (mpath->sn == orig_sn && - new_metric >= mpath->metric)) { + (rcu_access_pointer(mpath->next_hop) != + sta ? + mult_frac(new_metric, 10, 9) : + new_metric) >= mpath->metric)) { process = false; fresh_info = false; } @@ -476,12 +485,15 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata, } if (fresh_info) { + if (rcu_access_pointer(mpath->next_hop) != sta) + mpath->path_change_count++; mesh_path_assign_nexthop(mpath, sta); mpath->flags |= MESH_PATH_SN_VALID; mpath->metric = new_metric; mpath->sn = orig_sn; mpath->exp_time = time_after(mpath->exp_time, exp_time) ? mpath->exp_time : exp_time; + mpath->hop_count = hopcount; mesh_path_activate(mpath); spin_unlock_bh(&mpath->state_lock); ewma_mesh_fail_avg_init(&sta->mesh->fail_avg); @@ -506,8 +518,10 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata, if (mpath) { spin_lock_bh(&mpath->state_lock); if ((mpath->flags & MESH_PATH_FIXED) || - ((mpath->flags & MESH_PATH_ACTIVE) && - (last_hop_metric > mpath->metric))) + ((mpath->flags & MESH_PATH_ACTIVE) && + ((rcu_access_pointer(mpath->next_hop) != sta ? + mult_frac(last_hop_metric, 10, 9) : + last_hop_metric) > mpath->metric))) fresh_info = false; } else { mpath = mesh_path_add(sdata, ta); @@ -519,10 +533,13 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata, } if (fresh_info) { + if (rcu_access_pointer(mpath->next_hop) != sta) + mpath->path_change_count++; mesh_path_assign_nexthop(mpath, sta); mpath->metric = last_hop_metric; mpath->exp_time = time_after(mpath->exp_time, exp_time) ? mpath->exp_time : exp_time; + mpath->hop_count = 1; mesh_path_activate(mpath); spin_unlock_bh(&mpath->state_lock); ewma_mesh_fail_avg_init(&sta->mesh->fail_avg); diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index f466ec37d161..ccaf951e4e31 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -294,6 +294,15 @@ minstrel_get_ratestats(struct minstrel_ht_sta *mi, int index) return &mi->groups[index / MCS_GROUP_RATES].rates[index % MCS_GROUP_RATES]; } +static unsigned int +minstrel_ht_avg_ampdu_len(struct minstrel_ht_sta *mi) +{ + if (!mi->avg_ampdu_len) + return AVG_AMPDU_SIZE; + + return MINSTREL_TRUNC(mi->avg_ampdu_len); +} + /* * Return current throughput based on the average A-MPDU length, taking into * account the expected number of retransmissions and their expected length @@ -309,7 +318,7 @@ minstrel_ht_get_tp_avg(struct minstrel_ht_sta *mi, int group, int rate, return 0; if (group != MINSTREL_CCK_GROUP) - nsecs = 1000 * mi->overhead / MINSTREL_TRUNC(mi->avg_ampdu_len); + nsecs = 1000 * mi->overhead / minstrel_ht_avg_ampdu_len(mi); nsecs += minstrel_mcs_groups[group].duration[rate] << minstrel_mcs_groups[group].shift; @@ -503,8 +512,12 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) u16 tmp_cck_tp_rate[MAX_THR_RATES], index; if (mi->ampdu_packets > 0) { - mi->avg_ampdu_len = minstrel_ewma(mi->avg_ampdu_len, - MINSTREL_FRAC(mi->ampdu_len, mi->ampdu_packets), EWMA_LEVEL); + if (!ieee80211_hw_check(mp->hw, TX_STATUS_NO_AMPDU_LEN)) + mi->avg_ampdu_len = minstrel_ewma(mi->avg_ampdu_len, + MINSTREL_FRAC(mi->ampdu_len, mi->ampdu_packets), + EWMA_LEVEL); + else + mi->avg_ampdu_len = 0; mi->ampdu_len = 0; mi->ampdu_packets = 0; } @@ -709,7 +722,9 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband, mi->ampdu_len += info->status.ampdu_len; if (!mi->sample_wait && !mi->sample_tries && mi->sample_count > 0) { - mi->sample_wait = 16 + 2 * MINSTREL_TRUNC(mi->avg_ampdu_len); + int avg_ampdu_len = minstrel_ht_avg_ampdu_len(mi); + + mi->sample_wait = 16 + 2 * avg_ampdu_len; mi->sample_tries = 1; mi->sample_count--; } @@ -777,7 +792,7 @@ minstrel_calc_retransmit(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, unsigned int cw = mp->cw_min; unsigned int ctime = 0; unsigned int t_slot = 9; /* FIXME */ - unsigned int ampdu_len = MINSTREL_TRUNC(mi->avg_ampdu_len); + unsigned int ampdu_len = minstrel_ht_avg_ampdu_len(mi); unsigned int overhead = 0, overhead_rtscts = 0; mrs = minstrel_get_ratestats(mi, index); diff --git a/net/mac80211/rc80211_minstrel_ht_debugfs.c b/net/mac80211/rc80211_minstrel_ht_debugfs.c index 57820a5f2c16..31641d0b0f5c 100644 --- a/net/mac80211/rc80211_minstrel_ht_debugfs.c +++ b/net/mac80211/rc80211_minstrel_ht_debugfs.c @@ -160,9 +160,10 @@ minstrel_ht_stats_open(struct inode *inode, struct file *file) "lookaround %d\n", max(0, (int) mi->total_packets - (int) mi->sample_packets), mi->sample_packets); - p += sprintf(p, "Average # of aggregated frames per A-MPDU: %d.%d\n", - MINSTREL_TRUNC(mi->avg_ampdu_len), - MINSTREL_TRUNC(mi->avg_ampdu_len * 10) % 10); + if (mi->avg_ampdu_len) + p += sprintf(p, "Average # of aggregated frames per A-MPDU: %d.%d\n", + MINSTREL_TRUNC(mi->avg_ampdu_len), + MINSTREL_TRUNC(mi->avg_ampdu_len * 10) % 10); ms->len = p - ms->buf; WARN_ON(ms->len + sizeof(*ms) > 32768); diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index c4a8f115ed33..11f058987a54 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -90,7 +90,6 @@ static void __cleanup_single_sta(struct sta_info *sta) struct tid_ampdu_tx *tid_tx; struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_local *local = sdata->local; - struct fq *fq = &local->fq; struct ps_data *ps; if (test_sta_flag(sta, WLAN_STA_PS_STA) || @@ -120,9 +119,7 @@ static void __cleanup_single_sta(struct sta_info *sta) txqi = to_txq_info(sta->sta.txq[i]); - spin_lock_bh(&fq->lock); ieee80211_txq_purge(local, txqi); - spin_unlock_bh(&fq->lock); } } @@ -387,9 +384,12 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, if (sta_prepare_rate_control(local, sta, gfp)) goto free_txq; + sta->airtime_weight = IEEE80211_DEFAULT_AIRTIME_WEIGHT; + for (i = 0; i < IEEE80211_NUM_ACS; i++) { skb_queue_head_init(&sta->ps_tx_buf[i]); skb_queue_head_init(&sta->tx_filtered[i]); + sta->airtime[i].deficit = sta->airtime_weight; } for (i = 0; i < IEEE80211_NUM_TIDS; i++) @@ -1249,7 +1249,7 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta) if (!sta->sta.txq[i] || !txq_has_queue(sta->sta.txq[i])) continue; - drv_wake_tx_queue(local, to_txq_info(sta->sta.txq[i])); + schedule_and_wake_txq(local, to_txq_info(sta->sta.txq[i])); } skb_queue_head_init(&pending); @@ -1826,6 +1826,27 @@ void ieee80211_sta_set_buffered(struct ieee80211_sta *pubsta, } EXPORT_SYMBOL(ieee80211_sta_set_buffered); +void ieee80211_sta_register_airtime(struct ieee80211_sta *pubsta, u8 tid, + u32 tx_airtime, u32 rx_airtime) +{ + struct sta_info *sta = container_of(pubsta, struct sta_info, sta); + struct ieee80211_local *local = sta->sdata->local; + u8 ac = ieee80211_ac_from_tid(tid); + u32 airtime = 0; + + if (sta->local->airtime_flags & AIRTIME_USE_TX) + airtime += tx_airtime; + if (sta->local->airtime_flags & AIRTIME_USE_RX) + airtime += rx_airtime; + + spin_lock_bh(&local->active_txq_lock[ac]); + sta->airtime[ac].tx_airtime += tx_airtime; + sta->airtime[ac].rx_airtime += rx_airtime; + sta->airtime[ac].deficit -= airtime; + spin_unlock_bh(&local->active_txq_lock[ac]); +} +EXPORT_SYMBOL(ieee80211_sta_register_airtime); + int sta_info_move_state(struct sta_info *sta, enum ieee80211_sta_state new_state) { @@ -2188,6 +2209,23 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo, sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_FAILED); } + if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_RX_DURATION))) { + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) + sinfo->rx_duration += sta->airtime[ac].rx_airtime; + sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_DURATION); + } + + if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_DURATION))) { + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) + sinfo->tx_duration += sta->airtime[ac].tx_airtime; + sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_DURATION); + } + + if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_AIRTIME_WEIGHT))) { + sinfo->airtime_weight = sta->airtime_weight; + sinfo->filled |= BIT_ULL(NL80211_STA_INFO_AIRTIME_WEIGHT); + } + sinfo->rx_dropped_misc = sta->rx_stats.dropped; if (sta->pcpu_rx_stats) { for_each_possible_cpu(cpu) { diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 8eb29041be54..71f7e4973329 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -127,6 +127,16 @@ enum ieee80211_agg_stop_reason { AGG_STOP_DESTROY_STA, }; +/* Debugfs flags to enable/disable use of RX/TX airtime in scheduler */ +#define AIRTIME_USE_TX BIT(0) +#define AIRTIME_USE_RX BIT(1) + +struct airtime_info { + u64 rx_airtime; + u64 tx_airtime; + s64 deficit; +}; + struct sta_info; /** @@ -343,6 +353,7 @@ struct ieee80211_fast_rx { /* we use only values in the range 0-100, so pick a large precision */ DECLARE_EWMA(mesh_fail_avg, 20, 8) +DECLARE_EWMA(mesh_tx_rate_avg, 8, 16) /** * struct mesh_sta - mesh STA information @@ -366,6 +377,7 @@ DECLARE_EWMA(mesh_fail_avg, 20, 8) * processed * @connected_to_gate: true if mesh STA has a path to a mesh gate * @fail_avg: moving percentage of failed MSDUs + * @tx_rate_avg: moving average of tx bitrate */ struct mesh_sta { struct timer_list plink_timer; @@ -394,6 +406,8 @@ struct mesh_sta { /* moving percentage of failed MSDUs */ struct ewma_mesh_fail_avg fail_avg; + /* moving average of tx bitrate */ + struct ewma_mesh_tx_rate_avg tx_rate_avg; }; DECLARE_EWMA(signal, 10, 8) @@ -459,6 +473,9 @@ struct ieee80211_sta_rx_stats { * @last_seq_ctrl: last received seq/frag number from this STA (per TID * plus one for non-QoS frames) * @tid_seq: per-TID sequence numbers for sending to this STA + * @airtime: per-AC struct airtime_info describing airtime statistics for this + * station + * @airtime_weight: station weight for airtime fairness calculation purposes * @ampdu_mlme: A-MPDU state machine state * @mesh: mesh STA information * @debugfs_dir: debug filesystem directory dentry @@ -480,10 +497,28 @@ struct ieee80211_sta_rx_stats { * @tdls_chandef: a TDLS peer can have a wider chandef that is compatible to * the BSS one. * @tx_stats: TX statistics + * @tx_stats.packets: # of packets transmitted + * @tx_stats.bytes: # of bytes in all packets transmitted + * @tx_stats.last_rate: last TX rate + * @tx_stats.msdu: # of transmitted MSDUs per TID * @rx_stats: RX statistics + * @rx_stats_avg: averaged RX statistics + * @rx_stats_avg.signal: averaged signal + * @rx_stats_avg.chain_signal: averaged per-chain signal * @pcpu_rx_stats: per-CPU RX statistics, assigned only if the driver needs * this (by advertising the USES_RSS hw flag) * @status_stats: TX status statistics + * @status_stats.filtered: # of filtered frames + * @status_stats.retry_failed: # of frames that failed after retry + * @status_stats.retry_count: # of retries attempted + * @status_stats.lost_packets: # of lost packets + * @status_stats.last_tdls_pkt_time: timestamp of last TDLS packet + * @status_stats.msdu_retries: # of MSDU retries + * @status_stats.msdu_failed: # of failed MSDUs + * @status_stats.last_ack: last ack timestamp (jiffies) + * @status_stats.last_ack_signal: last ACK signal + * @status_stats.ack_signal_filled: last ACK signal validity + * @status_stats.avg_ack_signal: average ACK signal */ struct sta_info { /* General information, mostly static */ @@ -565,6 +600,9 @@ struct sta_info { } tx_stats; u16 tid_seq[IEEE80211_QOS_CTL_TID_MASK + 1]; + struct airtime_info airtime[IEEE80211_NUM_ACS]; + u16 airtime_weight; + /* * Aggregation information, locked with lock. */ diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 3f0b96e1e02f..5b9952b1caf3 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -823,6 +823,12 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw, ieee80211_sta_tx_notify(sta->sdata, (void *) skb->data, acked, info->status.tx_time); + if (info->status.tx_time && + wiphy_ext_feature_isset(local->hw.wiphy, + NL80211_EXT_FEATURE_AIRTIME_FAIRNESS)) + ieee80211_sta_register_airtime(&sta->sta, tid, + info->status.tx_time, 0); + if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) { if (info->flags & IEEE80211_TX_STAT_ACK) { if (sta->status_stats.lost_packets) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index f170d6c6629a..8a49a74c0a37 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1449,6 +1449,7 @@ void ieee80211_txq_init(struct ieee80211_sub_if_data *sdata, codel_vars_init(&txqi->def_cvars); codel_stats_init(&txqi->cstats); __skb_queue_head_init(&txqi->frags); + INIT_LIST_HEAD(&txqi->schedule_order); txqi->txq.vif = &sdata->vif; @@ -1487,8 +1488,14 @@ void ieee80211_txq_purge(struct ieee80211_local *local, struct fq *fq = &local->fq; struct fq_tin *tin = &txqi->tin; + spin_lock_bh(&fq->lock); fq_tin_reset(fq, tin, fq_skb_free_func); ieee80211_purge_tx_queue(&local->hw, &txqi->frags); + spin_unlock_bh(&fq->lock); + + spin_lock_bh(&local->active_txq_lock[txqi->txq.ac]); + list_del_init(&txqi->schedule_order); + spin_unlock_bh(&local->active_txq_lock[txqi->txq.ac]); } void ieee80211_txq_set_params(struct ieee80211_local *local) @@ -1605,7 +1612,7 @@ static bool ieee80211_queue_skb(struct ieee80211_local *local, ieee80211_txq_enqueue(local, txqi, skb); spin_unlock_bh(&fq->lock); - drv_wake_tx_queue(local, txqi); + schedule_and_wake_txq(local, txqi); return true; } @@ -1938,9 +1945,16 @@ static int ieee80211_skb_resize(struct ieee80211_sub_if_data *sdata, int head_need, bool may_encrypt) { struct ieee80211_local *local = sdata->local; + struct ieee80211_hdr *hdr; + bool enc_tailroom; int tail_need = 0; - if (may_encrypt && sdata->crypto_tx_tailroom_needed_cnt) { + hdr = (struct ieee80211_hdr *) skb->data; + enc_tailroom = may_encrypt && + (sdata->crypto_tx_tailroom_needed_cnt || + ieee80211_is_mgmt(hdr->frame_control)); + + if (enc_tailroom) { tail_need = IEEE80211_ENCRYPT_TAILROOM; tail_need -= skb_tailroom(skb); tail_need = max_t(int, tail_need, 0); @@ -1948,8 +1962,7 @@ static int ieee80211_skb_resize(struct ieee80211_sub_if_data *sdata, if (skb_cloned(skb) && (!ieee80211_hw_check(&local->hw, SUPPORTS_CLONED_SKBS) || - !skb_clone_writable(skb, ETH_HLEN) || - (may_encrypt && sdata->crypto_tx_tailroom_needed_cnt))) + !skb_clone_writable(skb, ETH_HLEN) || enc_tailroom)) I802_DEBUG_INC(local->tx_expand_skb_head_cloned); else if (head_need || tail_need) I802_DEBUG_INC(local->tx_expand_skb_head); @@ -3630,6 +3643,151 @@ out: } EXPORT_SYMBOL(ieee80211_tx_dequeue); +struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8 ac) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct txq_info *txqi = NULL; + + lockdep_assert_held(&local->active_txq_lock[ac]); + + begin: + txqi = list_first_entry_or_null(&local->active_txqs[ac], + struct txq_info, + schedule_order); + if (!txqi) + return NULL; + + if (txqi->txq.sta) { + struct sta_info *sta = container_of(txqi->txq.sta, + struct sta_info, sta); + + if (sta->airtime[txqi->txq.ac].deficit < 0) { + sta->airtime[txqi->txq.ac].deficit += + sta->airtime_weight; + list_move_tail(&txqi->schedule_order, + &local->active_txqs[txqi->txq.ac]); + goto begin; + } + } + + + if (txqi->schedule_round == local->schedule_round[ac]) + return NULL; + + list_del_init(&txqi->schedule_order); + txqi->schedule_round = local->schedule_round[ac]; + return &txqi->txq; +} +EXPORT_SYMBOL(ieee80211_next_txq); + +void ieee80211_return_txq(struct ieee80211_hw *hw, + struct ieee80211_txq *txq) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct txq_info *txqi = to_txq_info(txq); + + lockdep_assert_held(&local->active_txq_lock[txq->ac]); + + if (list_empty(&txqi->schedule_order) && + (!skb_queue_empty(&txqi->frags) || txqi->tin.backlog_packets)) { + /* If airtime accounting is active, always enqueue STAs at the + * head of the list to ensure that they only get moved to the + * back by the airtime DRR scheduler once they have a negative + * deficit. A station that already has a negative deficit will + * get immediately moved to the back of the list on the next + * call to ieee80211_next_txq(). + */ + if (txqi->txq.sta && + wiphy_ext_feature_isset(local->hw.wiphy, + NL80211_EXT_FEATURE_AIRTIME_FAIRNESS)) + list_add(&txqi->schedule_order, + &local->active_txqs[txq->ac]); + else + list_add_tail(&txqi->schedule_order, + &local->active_txqs[txq->ac]); + } +} +EXPORT_SYMBOL(ieee80211_return_txq); + +void ieee80211_schedule_txq(struct ieee80211_hw *hw, + struct ieee80211_txq *txq) + __acquires(txq_lock) __releases(txq_lock) +{ + struct ieee80211_local *local = hw_to_local(hw); + + spin_lock_bh(&local->active_txq_lock[txq->ac]); + ieee80211_return_txq(hw, txq); + spin_unlock_bh(&local->active_txq_lock[txq->ac]); +} +EXPORT_SYMBOL(ieee80211_schedule_txq); + +bool ieee80211_txq_may_transmit(struct ieee80211_hw *hw, + struct ieee80211_txq *txq) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct txq_info *iter, *tmp, *txqi = to_txq_info(txq); + struct sta_info *sta; + u8 ac = txq->ac; + + lockdep_assert_held(&local->active_txq_lock[ac]); + + if (!txqi->txq.sta) + goto out; + + if (list_empty(&txqi->schedule_order)) + goto out; + + list_for_each_entry_safe(iter, tmp, &local->active_txqs[ac], + schedule_order) { + if (iter == txqi) + break; + + if (!iter->txq.sta) { + list_move_tail(&iter->schedule_order, + &local->active_txqs[ac]); + continue; + } + sta = container_of(iter->txq.sta, struct sta_info, sta); + if (sta->airtime[ac].deficit < 0) + sta->airtime[ac].deficit += sta->airtime_weight; + list_move_tail(&iter->schedule_order, &local->active_txqs[ac]); + } + + sta = container_of(txqi->txq.sta, struct sta_info, sta); + if (sta->airtime[ac].deficit >= 0) + goto out; + + sta->airtime[ac].deficit += sta->airtime_weight; + list_move_tail(&txqi->schedule_order, &local->active_txqs[ac]); + + return false; +out: + if (!list_empty(&txqi->schedule_order)) + list_del_init(&txqi->schedule_order); + + return true; +} +EXPORT_SYMBOL(ieee80211_txq_may_transmit); + +void ieee80211_txq_schedule_start(struct ieee80211_hw *hw, u8 ac) + __acquires(txq_lock) +{ + struct ieee80211_local *local = hw_to_local(hw); + + spin_lock_bh(&local->active_txq_lock[ac]); + local->schedule_round[ac]++; +} +EXPORT_SYMBOL(ieee80211_txq_schedule_start); + +void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac) + __releases(txq_lock) +{ + struct ieee80211_local *local = hw_to_local(hw); + + spin_unlock_bh(&local->active_txq_lock[ac]); +} +EXPORT_SYMBOL(ieee80211_txq_schedule_end); + void __ieee80211_subif_start_xmit(struct sk_buff *skb, struct net_device *dev, u32 info_flags) diff --git a/net/mac80211/util.c b/net/mac80211/util.c index d0eb38b890aa..ba950ae974fc 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -5,7 +5,7 @@ * Copyright 2007 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018 Intel Corporation + * Copyright (C) 2018-2019 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -2146,6 +2146,10 @@ int ieee80211_reconfig(struct ieee80211_local *local) case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_MONITOR: break; + case NL80211_IFTYPE_ADHOC: + if (sdata->vif.bss_conf.ibss_joined) + WARN_ON(drv_join_ibss(local, sdata)); + /* fall through */ default: ieee80211_reconfig_stations(sdata); /* fall through */ diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c index 94f53a9b7d1a..dda8930f20e7 100644 --- a/net/mpls/mpls_iptunnel.c +++ b/net/mpls/mpls_iptunnel.c @@ -183,8 +183,8 @@ static int mpls_build_state(struct nlattr *nla, &n_labels, NULL, extack)) return -EINVAL; - newts = lwtunnel_state_alloc(sizeof(*tun_encap_info) + - n_labels * sizeof(u32)); + newts = lwtunnel_state_alloc(struct_size(tun_encap_info, label, + n_labels)); if (!newts) return -ENOMEM; diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig index cad48d07c818..8401cefd9f65 100644 --- a/net/netfilter/ipvs/Kconfig +++ b/net/netfilter/ipvs/Kconfig @@ -29,6 +29,7 @@ config IP_VS_IPV6 bool "IPv6 support for IPVS" depends on IPV6 = y || IP_VS = IPV6 select IP6_NF_IPTABLES + select NF_DEFRAG_IPV6 ---help--- Add IPv6 support to IPVS. diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index e969dad66991..43bbaa32b1d6 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1566,14 +1566,12 @@ ip_vs_try_to_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, /* sorry, all this trouble for a no-hit :) */ IP_VS_DBG_PKT(12, af, pp, skb, iph->off, "ip_vs_in: packet continues traversal as normal"); - if (iph->fragoffs) { - /* Fragment that couldn't be mapped to a conn entry - * is missing module nf_defrag_ipv6 - */ - IP_VS_DBG_RL("Unhandled frag, load nf_defrag_ipv6\n"); + + /* Fragment couldn't be mapped to a conn entry */ + if (iph->fragoffs) IP_VS_DBG_PKT(7, af, pp, skb, iph->off, "unhandled fragment"); - } + *verdict = NF_ACCEPT; return 0; } diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 446beeb5e7b2..3c36480559f9 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -43,6 +43,7 @@ #ifdef CONFIG_IP_VS_IPV6 #include <net/ipv6.h> #include <net/ip6_route.h> +#include <net/netfilter/ipv6/nf_defrag_ipv6.h> #endif #include <net/route.h> #include <net/sock.h> @@ -895,6 +896,7 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, { struct ip_vs_dest *dest; unsigned int atype, i; + int ret = 0; EnterFunction(2); @@ -905,6 +907,10 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, atype & IPV6_ADDR_LINKLOCAL) && !__ip_vs_addr_is_local_v6(svc->ipvs->net, &udest->addr.in6)) return -EINVAL; + + ret = nf_defrag_ipv6_enable(svc->ipvs->net); + if (ret) + return ret; } else #endif { @@ -1228,6 +1234,10 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, ret = -EINVAL; goto out_err; } + + ret = nf_defrag_ipv6_enable(ipvs->net); + if (ret) + goto out_err; } #endif @@ -2221,6 +2231,18 @@ static int ip_vs_set_timeout(struct netns_ipvs *ipvs, struct ip_vs_timeout_user u->udp_timeout); #ifdef CONFIG_IP_VS_PROTO_TCP + if (u->tcp_timeout < 0 || u->tcp_timeout > (INT_MAX / HZ) || + u->tcp_fin_timeout < 0 || u->tcp_fin_timeout > (INT_MAX / HZ)) { + return -EINVAL; + } +#endif + +#ifdef CONFIG_IP_VS_PROTO_UDP + if (u->udp_timeout < 0 || u->udp_timeout > (INT_MAX / HZ)) + return -EINVAL; +#endif + +#ifdef CONFIG_IP_VS_PROTO_TCP if (u->tcp_timeout) { pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP); pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 85de2a7b0ede..e139c256e269 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1050,6 +1050,22 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, } if (nf_ct_key_equal(h, tuple, zone, net)) { + /* Tuple is taken already, so caller will need to find + * a new source port to use. + * + * Only exception: + * If the *original tuples* are identical, then both + * conntracks refer to the same flow. + * This is a rare situation, it can occur e.g. when + * more than one UDP packet is sent from same socket + * in different threads. + * + * Let nf_ct_resolve_clash() deal with this later. + */ + if (nf_ct_tuple_equal(&ignored_conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple, + &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple)) + continue; + NF_CT_STAT_INC_ATOMIC(net, found); rcu_read_unlock(); return 1; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 7495f29d24e8..de3f1e2acae0 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -131,6 +131,23 @@ static void nft_trans_destroy(struct nft_trans *trans) kfree(trans); } +static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set) +{ + struct net *net = ctx->net; + struct nft_trans *trans; + + if (!nft_set_is_anonymous(set)) + return; + + list_for_each_entry_reverse(trans, &net->nft.commit_list, list) { + if (trans->msg_type == NFT_MSG_NEWSET && + nft_trans_set(trans) == set) { + nft_trans_set_bound(trans) = true; + break; + } + } +} + static int nf_tables_register_hook(struct net *net, const struct nft_table *table, struct nft_chain *chain) @@ -226,18 +243,6 @@ static int nft_delchain(struct nft_ctx *ctx) return err; } -/* either expr ops provide both activate/deactivate, or neither */ -static bool nft_expr_check_ops(const struct nft_expr_ops *ops) -{ - if (!ops) - return true; - - if (WARN_ON_ONCE((!ops->activate ^ !ops->deactivate))) - return false; - - return true; -} - static void nft_rule_expr_activate(const struct nft_ctx *ctx, struct nft_rule *rule) { @@ -253,14 +258,15 @@ static void nft_rule_expr_activate(const struct nft_ctx *ctx, } static void nft_rule_expr_deactivate(const struct nft_ctx *ctx, - struct nft_rule *rule) + struct nft_rule *rule, + enum nft_trans_phase phase) { struct nft_expr *expr; expr = nft_expr_first(rule); while (expr != nft_expr_last(rule) && expr->ops) { if (expr->ops->deactivate) - expr->ops->deactivate(ctx, expr); + expr->ops->deactivate(ctx, expr, phase); expr = nft_expr_next(expr); } @@ -311,7 +317,7 @@ static int nft_delrule(struct nft_ctx *ctx, struct nft_rule *rule) nft_trans_destroy(trans); return err; } - nft_rule_expr_deactivate(ctx, rule); + nft_rule_expr_deactivate(ctx, rule, NFT_TRANS_PREPARE); return 0; } @@ -1972,9 +1978,6 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk, */ int nft_register_expr(struct nft_expr_type *type) { - if (!nft_expr_check_ops(type->ops)) - return -EINVAL; - nfnl_lock(NFNL_SUBSYS_NFTABLES); if (type->family == NFPROTO_UNSPEC) list_add_tail_rcu(&type->list, &nf_tables_expressions); @@ -2122,10 +2125,6 @@ static int nf_tables_expr_parse(const struct nft_ctx *ctx, err = PTR_ERR(ops); goto err1; } - if (!nft_expr_check_ops(ops)) { - err = -EINVAL; - goto err1; - } } else ops = type->ops; @@ -2555,7 +2554,7 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx, static void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *rule) { - nft_rule_expr_deactivate(ctx, rule); + nft_rule_expr_deactivate(ctx, rule, NFT_TRANS_RELEASE); nf_tables_rule_destroy(ctx, rule); } @@ -3761,39 +3760,30 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, bind: binding->chain = ctx->chain; list_add_tail_rcu(&binding->list, &set->bindings); + nft_set_trans_bind(ctx, set); + return 0; } EXPORT_SYMBOL_GPL(nf_tables_bind_set); -void nf_tables_rebind_set(const struct nft_ctx *ctx, struct nft_set *set, - struct nft_set_binding *binding) -{ - if (list_empty(&set->bindings) && nft_set_is_anonymous(set) && - nft_is_active(ctx->net, set)) - list_add_tail_rcu(&set->list, &ctx->table->sets); - - list_add_tail_rcu(&binding->list, &set->bindings); -} -EXPORT_SYMBOL_GPL(nf_tables_rebind_set); - void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, - struct nft_set_binding *binding) + struct nft_set_binding *binding, bool event) { list_del_rcu(&binding->list); - if (list_empty(&set->bindings) && nft_set_is_anonymous(set) && - nft_is_active(ctx->net, set)) + if (list_empty(&set->bindings) && nft_set_is_anonymous(set)) { list_del_rcu(&set->list); + if (event) + nf_tables_set_notify(ctx, set, NFT_MSG_DELSET, + GFP_KERNEL); + } } EXPORT_SYMBOL_GPL(nf_tables_unbind_set); void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set) { - if (list_empty(&set->bindings) && nft_set_is_anonymous(set) && - nft_is_active(ctx->net, set)) { - nf_tables_set_notify(ctx, set, NFT_MSG_DELSET, GFP_ATOMIC); + if (list_empty(&set->bindings) && nft_set_is_anonymous(set)) nft_set_destroy(set); - } } EXPORT_SYMBOL_GPL(nf_tables_destroy_set); @@ -6622,6 +6612,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nf_tables_rule_notify(&trans->ctx, nft_trans_rule(trans), NFT_MSG_DELRULE); + nft_rule_expr_deactivate(&trans->ctx, + nft_trans_rule(trans), + NFT_TRANS_COMMIT); break; case NFT_MSG_NEWSET: nft_clear(net, nft_trans_set(trans)); @@ -6708,7 +6701,8 @@ static void nf_tables_abort_release(struct nft_trans *trans) nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans)); break; case NFT_MSG_NEWSET: - nft_set_destroy(nft_trans_set(trans)); + if (!nft_trans_set_bound(trans)) + nft_set_destroy(nft_trans_set(trans)); break; case NFT_MSG_NEWSETELEM: nft_set_elem_destroy(nft_trans_elem_set(trans), @@ -6769,7 +6763,9 @@ static int __nf_tables_abort(struct net *net) case NFT_MSG_NEWRULE: trans->ctx.chain->use--; list_del_rcu(&nft_trans_rule(trans)->list); - nft_rule_expr_deactivate(&trans->ctx, nft_trans_rule(trans)); + nft_rule_expr_deactivate(&trans->ctx, + nft_trans_rule(trans), + NFT_TRANS_ABORT); break; case NFT_MSG_DELRULE: trans->ctx.chain->use++; @@ -6779,7 +6775,8 @@ static int __nf_tables_abort(struct net *net) break; case NFT_MSG_NEWSET: trans->ctx.table->use--; - list_del_rcu(&nft_trans_set(trans)->list); + if (!nft_trans_set_bound(trans)) + list_del_rcu(&nft_trans_set(trans)->list); break; case NFT_MSG_DELSET: trans->ctx.table->use++; diff --git a/net/netfilter/nfnetlink_osf.c b/net/netfilter/nfnetlink_osf.c index 6f41dd74729d..1f1d90c1716b 100644 --- a/net/netfilter/nfnetlink_osf.c +++ b/net/netfilter/nfnetlink_osf.c @@ -66,6 +66,7 @@ static bool nf_osf_match_one(const struct sk_buff *skb, int ttl_check, struct nf_osf_hdr_ctx *ctx) { + const __u8 *optpinit = ctx->optp; unsigned int check_WSS = 0; int fmatch = FMATCH_WRONG; int foptsize, optnum; @@ -155,6 +156,9 @@ static bool nf_osf_match_one(const struct sk_buff *skb, } } + if (fmatch != FMATCH_OK) + ctx->optp = optpinit; + return fmatch == FMATCH_OK; } diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 7334e0b80a5e..0a4bad55a8aa 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -22,11 +22,15 @@ #include <linux/netfilter_bridge/ebtables.h> #include <linux/netfilter_arp/arp_tables.h> #include <net/netfilter/nf_tables.h> +#include <net/netns/generic.h> struct nft_xt { struct list_head head; struct nft_expr_ops ops; - unsigned int refcnt; + refcount_t refcnt; + + /* used only when transaction mutex is locked */ + unsigned int listcnt; /* Unlike other expressions, ops doesn't have static storage duration. * nft core assumes they do. We use kfree_rcu so that nft core can @@ -43,10 +47,39 @@ struct nft_xt_match_priv { void *info; }; +struct nft_compat_net { + struct list_head nft_target_list; + struct list_head nft_match_list; +}; + +static unsigned int nft_compat_net_id __read_mostly; +static struct nft_expr_type nft_match_type; +static struct nft_expr_type nft_target_type; + +static struct nft_compat_net *nft_compat_pernet(struct net *net) +{ + return net_generic(net, nft_compat_net_id); +} + +static void nft_xt_get(struct nft_xt *xt) +{ + /* refcount_inc() warns on 0 -> 1 transition, but we can't + * init the reference count to 1 in .select_ops -- we can't + * undo such an increase when another expression inside the same + * rule fails afterwards. + */ + if (xt->listcnt == 0) + refcount_set(&xt->refcnt, 1); + else + refcount_inc(&xt->refcnt); + + xt->listcnt++; +} + static bool nft_xt_put(struct nft_xt *xt) { - if (--xt->refcnt == 0) { - list_del(&xt->head); + if (refcount_dec_and_test(&xt->refcnt)) { + WARN_ON_ONCE(!list_empty(&xt->head)); kfree_rcu(xt, rcu_head); return true; } @@ -273,7 +306,7 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr, return -EINVAL; nft_xt = container_of(expr->ops, struct nft_xt, ops); - nft_xt->refcnt++; + nft_xt_get(nft_xt); return 0; } @@ -282,6 +315,7 @@ nft_target_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { struct xt_target *target = expr->ops->data; void *info = nft_expr_priv(expr); + struct module *me = target->me; struct xt_tgdtor_param par; par.net = ctx->net; @@ -292,7 +326,7 @@ nft_target_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) par.target->destroy(&par); if (nft_xt_put(container_of(expr->ops, struct nft_xt, ops))) - module_put(target->me); + module_put(me); } static int nft_extension_dump_info(struct sk_buff *skb, int attr, @@ -486,7 +520,7 @@ __nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, return ret; nft_xt = container_of(expr->ops, struct nft_xt, ops); - nft_xt->refcnt++; + nft_xt_get(nft_xt); return 0; } @@ -540,6 +574,18 @@ nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) __nft_match_destroy(ctx, expr, nft_expr_priv(expr)); } +static void nft_compat_deactivate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + enum nft_trans_phase phase) +{ + struct nft_xt *xt = container_of(expr->ops, struct nft_xt, ops); + + if (phase == NFT_TRANS_ABORT || phase == NFT_TRANS_COMMIT) { + if (--xt->listcnt == 0) + list_del_init(&xt->head); + } +} + static void nft_match_large_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { @@ -734,10 +780,6 @@ static const struct nfnetlink_subsystem nfnl_compat_subsys = { .cb = nfnl_nft_compat_cb, }; -static LIST_HEAD(nft_match_list); - -static struct nft_expr_type nft_match_type; - static bool nft_match_cmp(const struct xt_match *match, const char *name, u32 rev, u32 family) { @@ -749,6 +791,7 @@ static const struct nft_expr_ops * nft_match_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { + struct nft_compat_net *cn; struct nft_xt *nft_match; struct xt_match *match; unsigned int matchsize; @@ -765,8 +808,10 @@ nft_match_select_ops(const struct nft_ctx *ctx, rev = ntohl(nla_get_be32(tb[NFTA_MATCH_REV])); family = ctx->family; + cn = nft_compat_pernet(ctx->net); + /* Re-use the existing match if it's already loaded. */ - list_for_each_entry(nft_match, &nft_match_list, head) { + list_for_each_entry(nft_match, &cn->nft_match_list, head) { struct xt_match *match = nft_match->ops.data; if (nft_match_cmp(match, mt_name, rev, family)) @@ -789,11 +834,12 @@ nft_match_select_ops(const struct nft_ctx *ctx, goto err; } - nft_match->refcnt = 0; + refcount_set(&nft_match->refcnt, 0); nft_match->ops.type = &nft_match_type; nft_match->ops.eval = nft_match_eval; nft_match->ops.init = nft_match_init; nft_match->ops.destroy = nft_match_destroy; + nft_match->ops.deactivate = nft_compat_deactivate; nft_match->ops.dump = nft_match_dump; nft_match->ops.validate = nft_match_validate; nft_match->ops.data = match; @@ -810,7 +856,8 @@ nft_match_select_ops(const struct nft_ctx *ctx, nft_match->ops.size = matchsize; - list_add(&nft_match->head, &nft_match_list); + nft_match->listcnt = 0; + list_add(&nft_match->head, &cn->nft_match_list); return &nft_match->ops; err: @@ -826,10 +873,6 @@ static struct nft_expr_type nft_match_type __read_mostly = { .owner = THIS_MODULE, }; -static LIST_HEAD(nft_target_list); - -static struct nft_expr_type nft_target_type; - static bool nft_target_cmp(const struct xt_target *tg, const char *name, u32 rev, u32 family) { @@ -841,6 +884,7 @@ static const struct nft_expr_ops * nft_target_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { + struct nft_compat_net *cn; struct nft_xt *nft_target; struct xt_target *target; char *tg_name; @@ -861,8 +905,9 @@ nft_target_select_ops(const struct nft_ctx *ctx, strcmp(tg_name, "standard") == 0) return ERR_PTR(-EINVAL); + cn = nft_compat_pernet(ctx->net); /* Re-use the existing target if it's already loaded. */ - list_for_each_entry(nft_target, &nft_target_list, head) { + list_for_each_entry(nft_target, &cn->nft_target_list, head) { struct xt_target *target = nft_target->ops.data; if (!target->target) @@ -893,11 +938,12 @@ nft_target_select_ops(const struct nft_ctx *ctx, goto err; } - nft_target->refcnt = 0; + refcount_set(&nft_target->refcnt, 0); nft_target->ops.type = &nft_target_type; nft_target->ops.size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize)); nft_target->ops.init = nft_target_init; nft_target->ops.destroy = nft_target_destroy; + nft_target->ops.deactivate = nft_compat_deactivate; nft_target->ops.dump = nft_target_dump; nft_target->ops.validate = nft_target_validate; nft_target->ops.data = target; @@ -907,7 +953,8 @@ nft_target_select_ops(const struct nft_ctx *ctx, else nft_target->ops.eval = nft_target_eval_xt; - list_add(&nft_target->head, &nft_target_list); + nft_target->listcnt = 0; + list_add(&nft_target->head, &cn->nft_target_list); return &nft_target->ops; err: @@ -923,13 +970,74 @@ static struct nft_expr_type nft_target_type __read_mostly = { .owner = THIS_MODULE, }; +static int __net_init nft_compat_init_net(struct net *net) +{ + struct nft_compat_net *cn = nft_compat_pernet(net); + + INIT_LIST_HEAD(&cn->nft_target_list); + INIT_LIST_HEAD(&cn->nft_match_list); + + return 0; +} + +static void __net_exit nft_compat_exit_net(struct net *net) +{ + struct nft_compat_net *cn = nft_compat_pernet(net); + struct nft_xt *xt, *next; + + if (list_empty(&cn->nft_match_list) && + list_empty(&cn->nft_target_list)) + return; + + /* If there was an error that caused nft_xt expr to not be initialized + * fully and noone else requested the same expression later, the lists + * contain 0-refcount entries that still hold module reference. + * + * Clean them here. + */ + mutex_lock(&net->nft.commit_mutex); + list_for_each_entry_safe(xt, next, &cn->nft_target_list, head) { + struct xt_target *target = xt->ops.data; + + list_del_init(&xt->head); + + if (refcount_read(&xt->refcnt)) + continue; + module_put(target->me); + kfree(xt); + } + + list_for_each_entry_safe(xt, next, &cn->nft_match_list, head) { + struct xt_match *match = xt->ops.data; + + list_del_init(&xt->head); + + if (refcount_read(&xt->refcnt)) + continue; + module_put(match->me); + kfree(xt); + } + mutex_unlock(&net->nft.commit_mutex); +} + +static struct pernet_operations nft_compat_net_ops = { + .init = nft_compat_init_net, + .exit = nft_compat_exit_net, + .id = &nft_compat_net_id, + .size = sizeof(struct nft_compat_net), +}; + static int __init nft_compat_module_init(void) { int ret; + ret = register_pernet_subsys(&nft_compat_net_ops); + if (ret < 0) + goto err_target; + ret = nft_register_expr(&nft_match_type); if (ret < 0) - return ret; + goto err_pernet; ret = nft_register_expr(&nft_target_type); if (ret < 0) @@ -942,45 +1050,21 @@ static int __init nft_compat_module_init(void) } return ret; - err_target: nft_unregister_expr(&nft_target_type); err_match: nft_unregister_expr(&nft_match_type); +err_pernet: + unregister_pernet_subsys(&nft_compat_net_ops); return ret; } static void __exit nft_compat_module_exit(void) { - struct nft_xt *xt, *next; - - /* list should be empty here, it can be non-empty only in case there - * was an error that caused nft_xt expr to not be initialized fully - * and noone else requested the same expression later. - * - * In this case, the lists contain 0-refcount entries that still - * hold module reference. - */ - list_for_each_entry_safe(xt, next, &nft_target_list, head) { - struct xt_target *target = xt->ops.data; - - if (WARN_ON_ONCE(xt->refcnt)) - continue; - module_put(target->me); - kfree(xt); - } - - list_for_each_entry_safe(xt, next, &nft_match_list, head) { - struct xt_match *match = xt->ops.data; - - if (WARN_ON_ONCE(xt->refcnt)) - continue; - module_put(match->me); - kfree(xt); - } nfnetlink_subsys_unregister(&nfnl_compat_subsys); nft_unregister_expr(&nft_target_type); nft_unregister_expr(&nft_match_type); + unregister_pernet_subsys(&nft_compat_net_ops); } MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_NFT_COMPAT); diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 9658493d37d4..a8a74a16f9c4 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -234,20 +234,17 @@ err1: return err; } -static void nft_dynset_activate(const struct nft_ctx *ctx, - const struct nft_expr *expr) -{ - struct nft_dynset *priv = nft_expr_priv(expr); - - nf_tables_rebind_set(ctx, priv->set, &priv->binding); -} - static void nft_dynset_deactivate(const struct nft_ctx *ctx, - const struct nft_expr *expr) + const struct nft_expr *expr, + enum nft_trans_phase phase) { struct nft_dynset *priv = nft_expr_priv(expr); - nf_tables_unbind_set(ctx, priv->set, &priv->binding); + if (phase == NFT_TRANS_PREPARE) + return; + + nf_tables_unbind_set(ctx, priv->set, &priv->binding, + phase == NFT_TRANS_COMMIT); } static void nft_dynset_destroy(const struct nft_ctx *ctx, @@ -295,7 +292,6 @@ static const struct nft_expr_ops nft_dynset_ops = { .eval = nft_dynset_eval, .init = nft_dynset_init, .destroy = nft_dynset_destroy, - .activate = nft_dynset_activate, .deactivate = nft_dynset_deactivate, .dump = nft_dynset_dump, }; diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index 3e5ed787b1d4..5ec43124cbca 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -72,10 +72,14 @@ static void nft_immediate_activate(const struct nft_ctx *ctx, } static void nft_immediate_deactivate(const struct nft_ctx *ctx, - const struct nft_expr *expr) + const struct nft_expr *expr, + enum nft_trans_phase phase) { const struct nft_immediate_expr *priv = nft_expr_priv(expr); + if (phase == NFT_TRANS_COMMIT) + return; + return nft_data_release(&priv->data, nft_dreg_to_type(priv->dreg)); } diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index 227b2b15a19c..14496da5141d 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -121,20 +121,17 @@ static int nft_lookup_init(const struct nft_ctx *ctx, return 0; } -static void nft_lookup_activate(const struct nft_ctx *ctx, - const struct nft_expr *expr) -{ - struct nft_lookup *priv = nft_expr_priv(expr); - - nf_tables_rebind_set(ctx, priv->set, &priv->binding); -} - static void nft_lookup_deactivate(const struct nft_ctx *ctx, - const struct nft_expr *expr) + const struct nft_expr *expr, + enum nft_trans_phase phase) { struct nft_lookup *priv = nft_expr_priv(expr); - nf_tables_unbind_set(ctx, priv->set, &priv->binding); + if (phase == NFT_TRANS_PREPARE) + return; + + nf_tables_unbind_set(ctx, priv->set, &priv->binding, + phase == NFT_TRANS_COMMIT); } static void nft_lookup_destroy(const struct nft_ctx *ctx, @@ -225,7 +222,6 @@ static const struct nft_expr_ops nft_lookup_ops = { .size = NFT_EXPR_SIZE(sizeof(struct nft_lookup)), .eval = nft_lookup_eval, .init = nft_lookup_init, - .activate = nft_lookup_activate, .deactivate = nft_lookup_deactivate, .destroy = nft_lookup_destroy, .dump = nft_lookup_dump, diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c index c1f2adf198a0..79ef074c18ca 100644 --- a/net/netfilter/nft_objref.c +++ b/net/netfilter/nft_objref.c @@ -156,20 +156,17 @@ nla_put_failure: return -1; } -static void nft_objref_map_activate(const struct nft_ctx *ctx, - const struct nft_expr *expr) -{ - struct nft_objref_map *priv = nft_expr_priv(expr); - - nf_tables_rebind_set(ctx, priv->set, &priv->binding); -} - static void nft_objref_map_deactivate(const struct nft_ctx *ctx, - const struct nft_expr *expr) + const struct nft_expr *expr, + enum nft_trans_phase phase) { struct nft_objref_map *priv = nft_expr_priv(expr); - nf_tables_unbind_set(ctx, priv->set, &priv->binding); + if (phase == NFT_TRANS_PREPARE) + return; + + nf_tables_unbind_set(ctx, priv->set, &priv->binding, + phase == NFT_TRANS_COMMIT); } static void nft_objref_map_destroy(const struct nft_ctx *ctx, @@ -186,7 +183,6 @@ static const struct nft_expr_ops nft_objref_map_ops = { .size = NFT_EXPR_SIZE(sizeof(struct nft_objref_map)), .eval = nft_objref_map_eval, .init = nft_objref_map_init, - .activate = nft_objref_map_activate, .deactivate = nft_objref_map_deactivate, .destroy = nft_objref_map_destroy, .dump = nft_objref_map_dump, diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index aecadd471e1d..13e1ac333fa4 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -1899,7 +1899,7 @@ static int __init xt_init(void) seqcount_init(&per_cpu(xt_recseq, i)); } - xt = kmalloc_array(NFPROTO_NUMPROTO, sizeof(struct xt_af), GFP_KERNEL); + xt = kcalloc(NFPROTO_NUMPROTO, sizeof(struct xt_af), GFP_KERNEL); if (!xt) return -ENOMEM; diff --git a/net/netrom/nr_timer.c b/net/netrom/nr_timer.c index cbd51ed5a2d7..908e53ab47a4 100644 --- a/net/netrom/nr_timer.c +++ b/net/netrom/nr_timer.c @@ -52,21 +52,21 @@ void nr_start_t1timer(struct sock *sk) { struct nr_sock *nr = nr_sk(sk); - mod_timer(&nr->t1timer, jiffies + nr->t1); + sk_reset_timer(sk, &nr->t1timer, jiffies + nr->t1); } void nr_start_t2timer(struct sock *sk) { struct nr_sock *nr = nr_sk(sk); - mod_timer(&nr->t2timer, jiffies + nr->t2); + sk_reset_timer(sk, &nr->t2timer, jiffies + nr->t2); } void nr_start_t4timer(struct sock *sk) { struct nr_sock *nr = nr_sk(sk); - mod_timer(&nr->t4timer, jiffies + nr->t4); + sk_reset_timer(sk, &nr->t4timer, jiffies + nr->t4); } void nr_start_idletimer(struct sock *sk) @@ -74,37 +74,37 @@ void nr_start_idletimer(struct sock *sk) struct nr_sock *nr = nr_sk(sk); if (nr->idle > 0) - mod_timer(&nr->idletimer, jiffies + nr->idle); + sk_reset_timer(sk, &nr->idletimer, jiffies + nr->idle); } void nr_start_heartbeat(struct sock *sk) { - mod_timer(&sk->sk_timer, jiffies + 5 * HZ); + sk_reset_timer(sk, &sk->sk_timer, jiffies + 5 * HZ); } void nr_stop_t1timer(struct sock *sk) { - del_timer(&nr_sk(sk)->t1timer); + sk_stop_timer(sk, &nr_sk(sk)->t1timer); } void nr_stop_t2timer(struct sock *sk) { - del_timer(&nr_sk(sk)->t2timer); + sk_stop_timer(sk, &nr_sk(sk)->t2timer); } void nr_stop_t4timer(struct sock *sk) { - del_timer(&nr_sk(sk)->t4timer); + sk_stop_timer(sk, &nr_sk(sk)->t4timer); } void nr_stop_idletimer(struct sock *sk) { - del_timer(&nr_sk(sk)->idletimer); + sk_stop_timer(sk, &nr_sk(sk)->idletimer); } void nr_stop_heartbeat(struct sock *sk) { - del_timer(&sk->sk_timer); + sk_stop_timer(sk, &sk->sk_timer); } int nr_t1timer_running(struct sock *sk) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 3b1a78906bc0..1cd1d83a4be0 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -4292,7 +4292,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, rb->frames_per_block = req->tp_block_size / req->tp_frame_size; if (unlikely(rb->frames_per_block == 0)) goto out; - if (unlikely(req->tp_block_size > UINT_MAX / req->tp_block_nr)) + if (unlikely(rb->frames_per_block > UINT_MAX / req->tp_block_nr)) goto out; if (unlikely((rb->frames_per_block * req->tp_block_nr) != req->tp_frame_nr)) diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index 65387e1e6964..d6cc97fbbbb0 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -254,7 +254,40 @@ static __poll_t rds_poll(struct file *file, struct socket *sock, static int rds_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { - return -ENOIOCTLCMD; + struct rds_sock *rs = rds_sk_to_rs(sock->sk); + rds_tos_t utos, tos = 0; + + switch (cmd) { + case SIOCRDSSETTOS: + if (get_user(utos, (rds_tos_t __user *)arg)) + return -EFAULT; + + if (rs->rs_transport && + rs->rs_transport->get_tos_map) + tos = rs->rs_transport->get_tos_map(utos); + else + return -ENOIOCTLCMD; + + spin_lock_bh(&rds_sock_lock); + if (rs->rs_tos || rs->rs_conn) { + spin_unlock_bh(&rds_sock_lock); + return -EINVAL; + } + rs->rs_tos = tos; + spin_unlock_bh(&rds_sock_lock); + break; + case SIOCRDSGETTOS: + spin_lock_bh(&rds_sock_lock); + tos = rs->rs_tos; + spin_unlock_bh(&rds_sock_lock); + if (put_user(tos, (rds_tos_t __user *)arg)) + return -EFAULT; + break; + default: + return -ENOIOCTLCMD; + } + + return 0; } static int rds_cancel_sent_to(struct rds_sock *rs, char __user *optval, @@ -348,7 +381,7 @@ static int rds_set_transport(struct rds_sock *rs, char __user *optval, } static int rds_enable_recvtstamp(struct sock *sk, char __user *optval, - int optlen) + int optlen, int optname) { int val, valbool; @@ -360,6 +393,9 @@ static int rds_enable_recvtstamp(struct sock *sk, char __user *optval, valbool = val ? 1 : 0; + if (optname == SO_TIMESTAMP_NEW) + sock_set_flag(sk, SOCK_TSTAMP_NEW); + if (valbool) sock_set_flag(sk, SOCK_RCVTSTAMP); else @@ -430,9 +466,10 @@ static int rds_setsockopt(struct socket *sock, int level, int optname, ret = rds_set_transport(rs, optval, optlen); release_sock(sock->sk); break; - case SO_TIMESTAMP: + case SO_TIMESTAMP_OLD: + case SO_TIMESTAMP_NEW: lock_sock(sock->sk); - ret = rds_enable_recvtstamp(sock->sk, optval, optlen); + ret = rds_enable_recvtstamp(sock->sk, optval, optlen, optname); release_sock(sock->sk); break; case SO_RDS_MSG_RXPATH_LATENCY: @@ -646,6 +683,8 @@ static int __rds_create(struct socket *sock, struct sock *sk, int protocol) spin_lock_init(&rs->rs_rdma_lock); rs->rs_rdma_keys = RB_ROOT; rs->rs_rx_traces = 0; + rs->rs_tos = 0; + rs->rs_conn = NULL; spin_lock_bh(&rds_sock_lock); list_add_tail(&rs->rs_item, &rds_sock_list); diff --git a/net/rds/bind.c b/net/rds/bind.c index 762d2c6788a3..17c9d9f0c848 100644 --- a/net/rds/bind.c +++ b/net/rds/bind.c @@ -78,10 +78,10 @@ struct rds_sock *rds_find_bound(const struct in6_addr *addr, __be16 port, __rds_create_bind_key(key, addr, port, scope_id); rcu_read_lock(); rs = rhashtable_lookup(&bind_hash_table, key, ht_parms); - if (rs && !sock_flag(rds_rs_to_sk(rs), SOCK_DEAD)) - rds_sock_addref(rs); - else + if (rs && (sock_flag(rds_rs_to_sk(rs), SOCK_DEAD) || + !refcount_inc_not_zero(&rds_rs_to_sk(rs)->sk_refcnt))) rs = NULL; + rcu_read_unlock(); rdsdebug("returning rs %p for %pI6c:%u\n", rs, addr, diff --git a/net/rds/connection.c b/net/rds/connection.c index 3bd2f4a5a30d..7ea134f9a825 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -84,7 +84,7 @@ static struct rds_connection *rds_conn_lookup(struct net *net, const struct in6_addr *laddr, const struct in6_addr *faddr, struct rds_transport *trans, - int dev_if) + u8 tos, int dev_if) { struct rds_connection *conn, *ret = NULL; @@ -92,6 +92,7 @@ static struct rds_connection *rds_conn_lookup(struct net *net, if (ipv6_addr_equal(&conn->c_faddr, faddr) && ipv6_addr_equal(&conn->c_laddr, laddr) && conn->c_trans == trans && + conn->c_tos == tos && net == rds_conn_net(conn) && conn->c_dev_if == dev_if) { ret = conn; @@ -139,6 +140,7 @@ static void __rds_conn_path_init(struct rds_connection *conn, atomic_set(&cp->cp_state, RDS_CONN_DOWN); cp->cp_send_gen = 0; cp->cp_reconnect_jiffies = 0; + cp->cp_conn->c_proposed_version = RDS_PROTOCOL_VERSION; INIT_DELAYED_WORK(&cp->cp_send_w, rds_send_worker); INIT_DELAYED_WORK(&cp->cp_recv_w, rds_recv_worker); INIT_DELAYED_WORK(&cp->cp_conn_w, rds_connect_worker); @@ -159,7 +161,7 @@ static struct rds_connection *__rds_conn_create(struct net *net, const struct in6_addr *laddr, const struct in6_addr *faddr, struct rds_transport *trans, - gfp_t gfp, + gfp_t gfp, u8 tos, int is_outgoing, int dev_if) { @@ -171,7 +173,7 @@ static struct rds_connection *__rds_conn_create(struct net *net, int npaths = (trans->t_mp_capable ? RDS_MPATH_WORKERS : 1); rcu_read_lock(); - conn = rds_conn_lookup(net, head, laddr, faddr, trans, dev_if); + conn = rds_conn_lookup(net, head, laddr, faddr, trans, tos, dev_if); if (conn && conn->c_loopback && conn->c_trans != &rds_loop_transport && @@ -205,6 +207,7 @@ static struct rds_connection *__rds_conn_create(struct net *net, conn->c_isv6 = !ipv6_addr_v4mapped(laddr); conn->c_faddr = *faddr; conn->c_dev_if = dev_if; + conn->c_tos = tos; #if IS_ENABLED(CONFIG_IPV6) /* If the local address is link local, set c_bound_if to be the @@ -297,7 +300,7 @@ static struct rds_connection *__rds_conn_create(struct net *net, struct rds_connection *found; found = rds_conn_lookup(net, head, laddr, faddr, trans, - dev_if); + tos, dev_if); if (found) { struct rds_conn_path *cp; int i; @@ -332,10 +335,10 @@ out: struct rds_connection *rds_conn_create(struct net *net, const struct in6_addr *laddr, const struct in6_addr *faddr, - struct rds_transport *trans, gfp_t gfp, - int dev_if) + struct rds_transport *trans, u8 tos, + gfp_t gfp, int dev_if) { - return __rds_conn_create(net, laddr, faddr, trans, gfp, 0, dev_if); + return __rds_conn_create(net, laddr, faddr, trans, gfp, tos, 0, dev_if); } EXPORT_SYMBOL_GPL(rds_conn_create); @@ -343,9 +346,9 @@ struct rds_connection *rds_conn_create_outgoing(struct net *net, const struct in6_addr *laddr, const struct in6_addr *faddr, struct rds_transport *trans, - gfp_t gfp, int dev_if) + u8 tos, gfp_t gfp, int dev_if) { - return __rds_conn_create(net, laddr, faddr, trans, gfp, 1, dev_if); + return __rds_conn_create(net, laddr, faddr, trans, gfp, tos, 1, dev_if); } EXPORT_SYMBOL_GPL(rds_conn_create_outgoing); diff --git a/net/rds/ib.c b/net/rds/ib.c index 9d7b7586f240..2da9b75bad16 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -301,6 +301,7 @@ static int rds_ib_conn_info_visitor(struct rds_connection *conn, iinfo->src_addr = conn->c_laddr.s6_addr32[3]; iinfo->dst_addr = conn->c_faddr.s6_addr32[3]; + iinfo->tos = conn->c_tos; memset(&iinfo->src_gid, 0, sizeof(iinfo->src_gid)); memset(&iinfo->dst_gid, 0, sizeof(iinfo->dst_gid)); @@ -514,6 +515,15 @@ void rds_ib_exit(void) rds_ib_mr_exit(); } +static u8 rds_ib_get_tos_map(u8 tos) +{ + /* 1:1 user to transport map for RDMA transport. + * In future, if custom map is desired, hook can export + * user configurable map. + */ + return tos; +} + struct rds_transport rds_ib_transport = { .laddr_check = rds_ib_laddr_check, .xmit_path_complete = rds_ib_xmit_path_complete, @@ -536,6 +546,7 @@ struct rds_transport rds_ib_transport = { .sync_mr = rds_ib_sync_mr, .free_mr = rds_ib_free_mr, .flush_mrs = rds_ib_flush_mrs, + .get_tos_map = rds_ib_get_tos_map, .t_owner = THIS_MODULE, .t_name = "infiniband", .t_unloading = rds_ib_is_unloading, diff --git a/net/rds/ib.h b/net/rds/ib.h index 71ff356ee702..752f92235a38 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -67,7 +67,9 @@ struct rds_ib_conn_priv_cmn { u8 ricpc_protocol_major; u8 ricpc_protocol_minor; __be16 ricpc_protocol_minor_mask; /* bitmask */ - __be32 ricpc_reserved1; + u8 ricpc_dp_toss; + u8 ripc_reserved1; + __be16 ripc_reserved2; __be64 ricpc_ack_seq; __be32 ricpc_credit; /* non-zero enables flow ctl */ }; diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index bfbb31f0c7fd..66c6eb56072b 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -133,23 +133,24 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even rds_ib_set_flow_control(conn, be32_to_cpu(credit)); } - if (conn->c_version < RDS_PROTOCOL(3, 1)) { - pr_notice("RDS/IB: Connection <%pI6c,%pI6c> version %u.%u no longer supported\n", - &conn->c_laddr, &conn->c_faddr, - RDS_PROTOCOL_MAJOR(conn->c_version), - RDS_PROTOCOL_MINOR(conn->c_version)); - set_bit(RDS_DESTROY_PENDING, &conn->c_path[0].cp_flags); - rds_conn_destroy(conn); - return; - } else { - pr_notice("RDS/IB: %s conn connected <%pI6c,%pI6c> version %u.%u%s\n", - ic->i_active_side ? "Active" : "Passive", - &conn->c_laddr, &conn->c_faddr, - RDS_PROTOCOL_MAJOR(conn->c_version), - RDS_PROTOCOL_MINOR(conn->c_version), - ic->i_flowctl ? ", flow control" : ""); + if (conn->c_version < RDS_PROTOCOL_VERSION) { + if (conn->c_version != RDS_PROTOCOL_COMPAT_VERSION) { + pr_notice("RDS/IB: Connection <%pI6c,%pI6c> version %u.%u no longer supported\n", + &conn->c_laddr, &conn->c_faddr, + RDS_PROTOCOL_MAJOR(conn->c_version), + RDS_PROTOCOL_MINOR(conn->c_version)); + rds_conn_destroy(conn); + return; + } } + pr_notice("RDS/IB: %s conn connected <%pI6c,%pI6c,%d> version %u.%u%s\n", + ic->i_active_side ? "Active" : "Passive", + &conn->c_laddr, &conn->c_faddr, conn->c_tos, + RDS_PROTOCOL_MAJOR(conn->c_version), + RDS_PROTOCOL_MINOR(conn->c_version), + ic->i_flowctl ? ", flow control" : ""); + atomic_set(&ic->i_cq_quiesce, 0); /* Init rings and fill recv. this needs to wait until protocol @@ -184,6 +185,7 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even NULL); } + conn->c_proposed_version = conn->c_version; rds_connect_complete(conn); } @@ -220,6 +222,7 @@ static void rds_ib_cm_fill_conn_param(struct rds_connection *conn, cpu_to_be16(RDS_IB_SUPPORTED_PROTOCOLS); dp->ricp_v6.dp_ack_seq = cpu_to_be64(rds_ib_piggyb_ack(ic)); + dp->ricp_v6.dp_cmn.ricpc_dp_toss = conn->c_tos; conn_param->private_data = &dp->ricp_v6; conn_param->private_data_len = sizeof(dp->ricp_v6); @@ -234,6 +237,7 @@ static void rds_ib_cm_fill_conn_param(struct rds_connection *conn, cpu_to_be16(RDS_IB_SUPPORTED_PROTOCOLS); dp->ricp_v4.dp_ack_seq = cpu_to_be64(rds_ib_piggyb_ack(ic)); + dp->ricp_v4.dp_cmn.ricpc_dp_toss = conn->c_tos; conn_param->private_data = &dp->ricp_v4; conn_param->private_data_len = sizeof(dp->ricp_v4); @@ -389,10 +393,9 @@ static void rds_ib_qp_event_handler(struct ib_event *event, void *data) rdma_notify(ic->i_cm_id, IB_EVENT_COMM_EST); break; default: - rdsdebug("Fatal QP Event %u (%s) " - "- connection %pI6c->%pI6c, reconnecting\n", - event->event, ib_event_msg(event->event), - &conn->c_laddr, &conn->c_faddr); + rdsdebug("Fatal QP Event %u (%s) - connection %pI6c->%pI6c, reconnecting\n", + event->event, ib_event_msg(event->event), + &conn->c_laddr, &conn->c_faddr); rds_conn_drop(conn); break; } @@ -660,13 +663,16 @@ static u32 rds_ib_protocol_compatible(struct rdma_cm_event *event, bool isv6) /* Even if len is crap *now* I still want to check it. -ASG */ if (event->param.conn.private_data_len < data_len || major == 0) - return RDS_PROTOCOL_3_0; + return RDS_PROTOCOL_4_0; common = be16_to_cpu(mask) & RDS_IB_SUPPORTED_PROTOCOLS; - if (major == 3 && common) { - version = RDS_PROTOCOL_3_0; + if (major == 4 && common) { + version = RDS_PROTOCOL_4_0; while ((common >>= 1) != 0) version++; + } else if (RDS_PROTOCOL_COMPAT_VERSION == + RDS_PROTOCOL(major, minor)) { + version = RDS_PROTOCOL_COMPAT_VERSION; } else { if (isv6) printk_ratelimited(KERN_NOTICE "RDS: Connection from %pI6c using incompatible protocol version %u.%u\n", @@ -729,8 +735,10 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, /* Check whether the remote protocol version matches ours. */ version = rds_ib_protocol_compatible(event, isv6); - if (!version) + if (!version) { + err = RDS_RDMA_REJ_INCOMPAT; goto out; + } dp = event->param.conn.private_data; if (isv6) { @@ -771,15 +779,16 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, daddr6 = &d_mapped_addr; } - rdsdebug("saddr %pI6c daddr %pI6c RDSv%u.%u lguid 0x%llx fguid " - "0x%llx\n", saddr6, daddr6, - RDS_PROTOCOL_MAJOR(version), RDS_PROTOCOL_MINOR(version), + rdsdebug("saddr %pI6c daddr %pI6c RDSv%u.%u lguid 0x%llx fguid 0x%llx, tos:%d\n", + saddr6, daddr6, RDS_PROTOCOL_MAJOR(version), + RDS_PROTOCOL_MINOR(version), (unsigned long long)be64_to_cpu(lguid), - (unsigned long long)be64_to_cpu(fguid)); + (unsigned long long)be64_to_cpu(fguid), dp_cmn->ricpc_dp_toss); /* RDS/IB is not currently netns aware, thus init_net */ conn = rds_conn_create(&init_net, daddr6, saddr6, - &rds_ib_transport, GFP_KERNEL, ifindex); + &rds_ib_transport, dp_cmn->ricpc_dp_toss, + GFP_KERNEL, ifindex); if (IS_ERR(conn)) { rdsdebug("rds_conn_create failed (%ld)\n", PTR_ERR(conn)); conn = NULL; @@ -846,7 +855,7 @@ out: if (conn) mutex_unlock(&conn->c_cm_lock); if (err) - rdma_reject(cm_id, NULL, 0); + rdma_reject(cm_id, &err, sizeof(int)); return destroy; } @@ -861,7 +870,7 @@ int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id, bool isv6) /* If the peer doesn't do protocol negotiation, we must * default to RDSv3.0 */ - rds_ib_set_protocol(conn, RDS_PROTOCOL_3_0); + rds_ib_set_protocol(conn, RDS_PROTOCOL_4_1); ic->i_flowctl = rds_ib_sysctl_flow_control; /* advertise flow control */ ret = rds_ib_setup_qp(conn); @@ -870,7 +879,8 @@ int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id, bool isv6) goto out; } - rds_ib_cm_fill_conn_param(conn, &conn_param, &dp, RDS_PROTOCOL_VERSION, + rds_ib_cm_fill_conn_param(conn, &conn_param, &dp, + conn->c_proposed_version, UINT_MAX, UINT_MAX, isv6); ret = rdma_connect(cm_id, &conn_param); if (ret) diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index 2f16146e4ec9..d395eec98959 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c @@ -986,9 +986,9 @@ void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic, } else { /* We expect errors as the qp is drained during shutdown */ if (rds_conn_up(conn) || rds_conn_connecting(conn)) - rds_ib_conn_error(conn, "recv completion on <%pI6c,%pI6c> had status %u (%s), disconnecting and reconnecting\n", + rds_ib_conn_error(conn, "recv completion on <%pI6c,%pI6c, %d> had status %u (%s), disconnecting and reconnecting\n", &conn->c_laddr, &conn->c_faddr, - wc->status, + conn->c_tos, wc->status, ib_wc_status_msg(wc->status)); } diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index 4e0c36acf866..09c46f2e97fa 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -305,8 +305,9 @@ void rds_ib_send_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc) /* We expect errors as the qp is drained during shutdown */ if (wc->status != IB_WC_SUCCESS && rds_conn_up(conn)) { - rds_ib_conn_error(conn, "send completion on <%pI6c,%pI6c> had status %u (%s), disconnecting and reconnecting\n", - &conn->c_laddr, &conn->c_faddr, wc->status, + rds_ib_conn_error(conn, "send completion on <%pI6c,%pI6c,%d> had status %u (%s), disconnecting and reconnecting\n", + &conn->c_laddr, &conn->c_faddr, + conn->c_tos, wc->status, ib_wc_status_msg(wc->status)); } } diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c index 6b0f57c83a2a..46bce8389066 100644 --- a/net/rds/rdma_transport.c +++ b/net/rds/rdma_transport.c @@ -51,6 +51,8 @@ static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id, struct rds_connection *conn = cm_id->context; struct rds_transport *trans; int ret = 0; + int *err; + u8 len; rdsdebug("conn %p id %p handling event %u (%s)\n", conn, cm_id, event->event, rdma_event_msg(event->event)); @@ -81,6 +83,7 @@ static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id, break; case RDMA_CM_EVENT_ADDR_RESOLVED: + rdma_set_service_type(cm_id, conn->c_tos); /* XXX do we need to clean up if this fails? */ ret = rdma_resolve_route(cm_id, RDS_RDMA_RESOLVE_TIMEOUT_MS); @@ -106,8 +109,19 @@ static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id, break; case RDMA_CM_EVENT_REJECTED: + if (!conn) + break; + err = (int *)rdma_consumer_reject_data(cm_id, event, &len); + if (!err || (err && ((*err) == RDS_RDMA_REJ_INCOMPAT))) { + pr_warn("RDS/RDMA: conn <%pI6c, %pI6c> rejected, dropping connection\n", + &conn->c_laddr, &conn->c_faddr); + conn->c_proposed_version = RDS_PROTOCOL_COMPAT_VERSION; + conn->c_tos = 0; + rds_conn_drop(conn); + } rdsdebug("Connection rejected: %s\n", rdma_reject_msg(cm_id, event->status)); + break; /* FALLTHROUGH */ case RDMA_CM_EVENT_ADDR_ERROR: case RDMA_CM_EVENT_ROUTE_ERROR: diff --git a/net/rds/rdma_transport.h b/net/rds/rdma_transport.h index 200d3134aaae..bfafd4a6d827 100644 --- a/net/rds/rdma_transport.h +++ b/net/rds/rdma_transport.h @@ -11,6 +11,12 @@ #define RDS_RDMA_RESOLVE_TIMEOUT_MS 5000 +/* Below reject reason is for legacy interoperability issue with non-linux + * RDS endpoints where older version incompatibility is conveyed via value 1. + * For future version(s), proper encoded reject reason should be be used. + */ +#define RDS_RDMA_REJ_INCOMPAT 1 + int rds_rdma_conn_connect(struct rds_connection *conn); int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id, struct rdma_cm_event *event); diff --git a/net/rds/rds.h b/net/rds/rds.h index 4ffe100ff5e6..0d8f67cadd74 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -19,10 +19,13 @@ */ #define RDS_PROTOCOL_3_0 0x0300 #define RDS_PROTOCOL_3_1 0x0301 +#define RDS_PROTOCOL_4_0 0x0400 +#define RDS_PROTOCOL_4_1 0x0401 #define RDS_PROTOCOL_VERSION RDS_PROTOCOL_3_1 #define RDS_PROTOCOL_MAJOR(v) ((v) >> 8) #define RDS_PROTOCOL_MINOR(v) ((v) & 255) #define RDS_PROTOCOL(maj, min) (((maj) << 8) | min) +#define RDS_PROTOCOL_COMPAT_VERSION RDS_PROTOCOL_3_1 /* The following ports, 16385, 18634, 18635, are registered with IANA as * the ports to be used for RDS over TCP and UDP. Currently, only RDS over @@ -151,9 +154,13 @@ struct rds_connection { struct rds_cong_map *c_fcong; /* Protocol version */ + unsigned int c_proposed_version; unsigned int c_version; possible_net_t c_net; + /* TOS */ + u8 c_tos; + struct list_head c_map_item; unsigned long c_map_queued; @@ -567,6 +574,7 @@ struct rds_transport { void (*free_mr)(void *trans_private, int invalidate); void (*flush_mrs)(void); bool (*t_unloading)(struct rds_connection *conn); + u8 (*get_tos_map)(u8 tos); }; /* Bind hash table key length. It is the sum of the size of a struct @@ -648,6 +656,7 @@ struct rds_sock { u8 rs_rx_traces; u8 rs_rx_trace[RDS_MSG_RX_DGRAM_TRACE_MAX]; struct rds_msg_zcopy_queue rs_zcookie_queue; + u8 rs_tos; }; static inline struct rds_sock *rds_sk_to_rs(const struct sock *sk) @@ -756,13 +765,14 @@ void rds_conn_exit(void); struct rds_connection *rds_conn_create(struct net *net, const struct in6_addr *laddr, const struct in6_addr *faddr, - struct rds_transport *trans, gfp_t gfp, + struct rds_transport *trans, + u8 tos, gfp_t gfp, int dev_if); struct rds_connection *rds_conn_create_outgoing(struct net *net, const struct in6_addr *laddr, const struct in6_addr *faddr, struct rds_transport *trans, - gfp_t gfp, int dev_if); + u8 tos, gfp_t gfp, int dev_if); void rds_conn_shutdown(struct rds_conn_path *cpath); void rds_conn_destroy(struct rds_connection *conn); void rds_conn_drop(struct rds_connection *conn); diff --git a/net/rds/recv.c b/net/rds/recv.c index 727639dac8a7..853de4876088 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -549,9 +549,21 @@ static int rds_cmsg_recv(struct rds_incoming *inc, struct msghdr *msg, if ((inc->i_rx_tstamp != 0) && sock_flag(rds_rs_to_sk(rs), SOCK_RCVTSTAMP)) { - struct timeval tv = ktime_to_timeval(inc->i_rx_tstamp); - ret = put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, - sizeof(tv), &tv); + struct __kernel_old_timeval tv = ns_to_kernel_old_timeval(inc->i_rx_tstamp); + + if (!sock_flag(rds_rs_to_sk(rs), SOCK_TSTAMP_NEW)) { + ret = put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD, + sizeof(tv), &tv); + } else { + struct __kernel_sock_timeval sk_tv; + + sk_tv.tv_sec = tv.tv_sec; + sk_tv.tv_usec = tv.tv_usec; + + ret = put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW, + sizeof(sk_tv), &sk_tv); + } + if (ret) goto out; } @@ -770,6 +782,7 @@ void rds_inc_info_copy(struct rds_incoming *inc, minfo.seq = be64_to_cpu(inc->i_hdr.h_sequence); minfo.len = be32_to_cpu(inc->i_hdr.h_len); + minfo.tos = inc->i_conn->c_tos; if (flip) { minfo.laddr = daddr; diff --git a/net/rds/send.c b/net/rds/send.c index fd8b687d5c05..166dd578c1cc 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -1277,12 +1277,13 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) /* rds_conn_create has a spinlock that runs with IRQ off. * Caching the conn in the socket helps a lot. */ - if (rs->rs_conn && ipv6_addr_equal(&rs->rs_conn->c_faddr, &daddr)) + if (rs->rs_conn && ipv6_addr_equal(&rs->rs_conn->c_faddr, &daddr) && + rs->rs_tos == rs->rs_conn->c_tos) { conn = rs->rs_conn; - else { + } else { conn = rds_conn_create_outgoing(sock_net(sock->sk), &rs->rs_bound_addr, &daddr, - rs->rs_transport, + rs->rs_transport, rs->rs_tos, sock->sk->sk_allocation, scope_id); if (IS_ERR(conn)) { diff --git a/net/rds/tcp.c b/net/rds/tcp.c index c16f0a362c32..fd2694174607 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -267,6 +267,7 @@ static void rds_tcp_tc_info(struct socket *rds_sock, unsigned int len, tsinfo.last_sent_nxt = tc->t_last_sent_nxt; tsinfo.last_expected_una = tc->t_last_expected_una; tsinfo.last_seen_una = tc->t_last_seen_una; + tsinfo.tos = tc->t_cpath->cp_conn->c_tos; rds_info_copy(iter, &tsinfo, sizeof(tsinfo)); } @@ -452,6 +453,12 @@ static void rds_tcp_destroy_conns(void) static void rds_tcp_exit(void); +static u8 rds_tcp_get_tos_map(u8 tos) +{ + /* all user tos mapped to default 0 for TCP transport */ + return 0; +} + struct rds_transport rds_tcp_transport = { .laddr_check = rds_tcp_laddr_check, .xmit_path_prepare = rds_tcp_xmit_path_prepare, @@ -466,6 +473,7 @@ struct rds_transport rds_tcp_transport = { .inc_free = rds_tcp_inc_free, .stats_info_copy = rds_tcp_stats_info_copy, .exit = rds_tcp_exit, + .get_tos_map = rds_tcp_get_tos_map, .t_owner = THIS_MODULE, .t_name = "tcp", .t_type = RDS_TRANS_TCP, diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index c12203f646da..810a3a49e947 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -200,7 +200,7 @@ int rds_tcp_accept_one(struct socket *sock) conn = rds_conn_create(sock_net(sock->sk), my_addr, peer_addr, - &rds_tcp_transport, GFP_KERNEL, dev_if); + &rds_tcp_transport, 0, GFP_KERNEL, dev_if); if (IS_ERR(conn)) { ret = PTR_ERR(conn); diff --git a/net/rds/threads.c b/net/rds/threads.c index e64f9e4c3cda..32dc50f0a303 100644 --- a/net/rds/threads.c +++ b/net/rds/threads.c @@ -93,6 +93,7 @@ void rds_connect_path_complete(struct rds_conn_path *cp, int curr) queue_delayed_work(rds_wq, &cp->cp_recv_w, 0); } rcu_read_unlock(); + cp->cp_conn->c_proposed_version = RDS_PROTOCOL_VERSION; } EXPORT_SYMBOL_GPL(rds_connect_path_complete); diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index 77e9f85a2c92..f2ff21d7df08 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -850,6 +850,7 @@ void rose_link_device_down(struct net_device *dev) /* * Route a frame to an appropriate AX.25 connection. + * A NULL ax25_cb indicates an internally generated frame. */ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25) { @@ -867,6 +868,10 @@ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25) if (skb->len < ROSE_MIN_LEN) return res; + + if (!ax25) + return rose_loopback_queue(skb, NULL); + frametype = skb->data[2]; lci = ((skb->data[0] << 8) & 0xF00) + ((skb->data[1] << 0) & 0x0FF); if (frametype == ROSE_CALL_REQUEST && diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index 0906e51d3cfb..15cf42d5b53a 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -202,7 +202,7 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) /* We want receive timestamps. */ opt = 1; - ret = kernel_setsockopt(local->socket, SOL_SOCKET, SO_TIMESTAMPNS, + ret = kernel_setsockopt(local->socket, SOL_SOCKET, SO_TIMESTAMPNS_OLD, (char *)&opt, sizeof(opt)); if (ret < 0) { _debug("setsockopt failed"); diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index eaf19ebaa964..3f7bb11f3290 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -596,6 +596,7 @@ error_requeue_call: } error_no_call: release_sock(&rx->sk); +error_trace: trace_rxrpc_recvmsg(call, rxrpc_recvmsg_return, 0, 0, 0, ret); return ret; @@ -604,7 +605,7 @@ wait_interrupted: wait_error: finish_wait(sk_sleep(&rx->sk), &wait); call = NULL; - goto error_no_call; + goto error_trace; } /** diff --git a/net/sched/act_api.c b/net/sched/act_api.c index d4b8355737d8..aecf1bf233c8 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -543,7 +543,7 @@ int tcf_register_action(struct tc_action_ops *act, write_lock(&act_mod_lock); list_for_each_entry(a, &act_base, head) { - if (act->type == a->type || (strcmp(act->kind, a->kind) == 0)) { + if (act->id == a->id || (strcmp(act->kind, a->kind) == 0)) { write_unlock(&act_mod_lock); unregister_pernet_subsys(ops); return -EEXIST; diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index c7633843e223..aa5c38d11a30 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -396,7 +396,7 @@ static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index) static struct tc_action_ops act_bpf_ops __read_mostly = { .kind = "bpf", - .type = TCA_ACT_BPF, + .id = TCA_ID_BPF, .owner = THIS_MODULE, .act = tcf_bpf_act, .dump = tcf_bpf_dump, diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 8475913f2070..5d24993cccfe 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -204,7 +204,7 @@ static int tcf_connmark_search(struct net *net, struct tc_action **a, u32 index) static struct tc_action_ops act_connmark_ops = { .kind = "connmark", - .type = TCA_ACT_CONNMARK, + .id = TCA_ID_CONNMARK, .owner = THIS_MODULE, .act = tcf_connmark_act, .dump = tcf_connmark_dump, diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 3dc25b7806d7..945fb34ae721 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -660,7 +660,7 @@ static size_t tcf_csum_get_fill_size(const struct tc_action *act) static struct tc_action_ops act_csum_ops = { .kind = "csum", - .type = TCA_ACT_CSUM, + .id = TCA_ID_CSUM, .owner = THIS_MODULE, .act = tcf_csum_act, .dump = tcf_csum_dump, diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index b61c20ebb314..93da0004e9f4 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -253,7 +253,7 @@ static size_t tcf_gact_get_fill_size(const struct tc_action *act) static struct tc_action_ops act_gact_ops = { .kind = "gact", - .type = TCA_ACT_GACT, + .id = TCA_ID_GACT, .owner = THIS_MODULE, .act = tcf_gact_act, .stats_update = tcf_gact_stats_update, diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 30b63fa23ee2..9b1f2b3990ee 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -864,7 +864,7 @@ static int tcf_ife_search(struct net *net, struct tc_action **a, u32 index) static struct tc_action_ops act_ife_ops = { .kind = "ife", - .type = TCA_ACT_IFE, + .id = TCA_ID_IFE, .owner = THIS_MODULE, .act = tcf_ife_act, .dump = tcf_ife_dump, diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 8af6c11d2482..1bad190710ad 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -338,7 +338,7 @@ static int tcf_ipt_search(struct net *net, struct tc_action **a, u32 index) static struct tc_action_ops act_ipt_ops = { .kind = "ipt", - .type = TCA_ACT_IPT, + .id = TCA_ID_IPT, .owner = THIS_MODULE, .act = tcf_ipt_act, .dump = tcf_ipt_dump, @@ -387,7 +387,7 @@ static int tcf_xt_search(struct net *net, struct tc_action **a, u32 index) static struct tc_action_ops act_xt_ops = { .kind = "xt", - .type = TCA_ACT_XT, + .id = TCA_ID_XT, .owner = THIS_MODULE, .act = tcf_ipt_act, .dump = tcf_ipt_dump, diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index c8cf4d10c435..6692fd054617 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -400,7 +400,7 @@ static void tcf_mirred_put_dev(struct net_device *dev) static struct tc_action_ops act_mirred_ops = { .kind = "mirred", - .type = TCA_ACT_MIRRED, + .id = TCA_ID_MIRRED, .owner = THIS_MODULE, .act = tcf_mirred_act, .stats_update = tcf_stats_update, diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index c5c1e23add77..543eab9193f1 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -304,7 +304,7 @@ static int tcf_nat_search(struct net *net, struct tc_action **a, u32 index) static struct tc_action_ops act_nat_ops = { .kind = "nat", - .type = TCA_ACT_NAT, + .id = TCA_ID_NAT, .owner = THIS_MODULE, .act = tcf_nat_act, .dump = tcf_nat_dump, diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 2b372a06b432..a80373878df7 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -406,7 +406,7 @@ static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a, struct tcf_t t; int s; - s = sizeof(*opt) + p->tcfp_nkeys * sizeof(struct tc_pedit_key); + s = struct_size(opt, keys, p->tcfp_nkeys); /* netlink spinlocks held above us - must use ATOMIC */ opt = kzalloc(s, GFP_ATOMIC); @@ -470,7 +470,7 @@ static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index) static struct tc_action_ops act_pedit_ops = { .kind = "pedit", - .type = TCA_ACT_PEDIT, + .id = TCA_ID_PEDIT, .owner = THIS_MODULE, .act = tcf_pedit_act, .dump = tcf_pedit_dump, diff --git a/net/sched/act_police.c b/net/sched/act_police.c index ec8ec55e0fe8..8271a6263824 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -366,7 +366,7 @@ MODULE_LICENSE("GPL"); static struct tc_action_ops act_police_ops = { .kind = "police", - .type = TCA_ID_POLICE, + .id = TCA_ID_POLICE, .owner = THIS_MODULE, .act = tcf_police_act, .dump = tcf_police_dump, diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 1a0c682fd734..203e399e5c85 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -233,7 +233,7 @@ static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index) static struct tc_action_ops act_sample_ops = { .kind = "sample", - .type = TCA_ACT_SAMPLE, + .id = TCA_ID_SAMPLE, .owner = THIS_MODULE, .act = tcf_sample_act, .dump = tcf_sample_dump, diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 902957beceb3..d54cb608dbaf 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -19,8 +19,6 @@ #include <net/netlink.h> #include <net/pkt_sched.h> -#define TCA_ACT_SIMP 22 - #include <linux/tc_act/tc_defact.h> #include <net/tc_act/tc_defact.h> @@ -197,7 +195,7 @@ static int tcf_simp_search(struct net *net, struct tc_action **a, u32 index) static struct tc_action_ops act_simp_ops = { .kind = "simple", - .type = TCA_ACT_SIMP, + .id = TCA_ID_SIMP, .owner = THIS_MODULE, .act = tcf_simp_act, .dump = tcf_simp_dump, diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index 64dba3708fce..39f8a67ea940 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -305,7 +305,7 @@ static int tcf_skbedit_search(struct net *net, struct tc_action **a, u32 index) static struct tc_action_ops act_skbedit_ops = { .kind = "skbedit", - .type = TCA_ACT_SKBEDIT, + .id = TCA_ID_SKBEDIT, .owner = THIS_MODULE, .act = tcf_skbedit_act, .dump = tcf_skbedit_dump, diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index 59710a183bd3..7bac1d78e7a3 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -260,7 +260,7 @@ static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index) static struct tc_action_ops act_skbmod_ops = { .kind = "skbmod", - .type = TCA_ACT_SKBMOD, + .id = TCA_ACT_SKBMOD, .owner = THIS_MODULE, .act = tcf_skbmod_act, .dump = tcf_skbmod_dump, diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index 8b43fe0130f7..9104b8e36482 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -563,7 +563,7 @@ static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index) static struct tc_action_ops act_tunnel_key_ops = { .kind = "tunnel_key", - .type = TCA_ACT_TUNNEL_KEY, + .id = TCA_ID_TUNNEL_KEY, .owner = THIS_MODULE, .act = tunnel_key_act, .dump = tunnel_key_dump, diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index 93fdaf707313..ac0061599225 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -297,7 +297,7 @@ static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index) static struct tc_action_ops act_vlan_ops = { .kind = "vlan", - .type = TCA_ACT_VLAN, + .id = TCA_ID_VLAN, .owner = THIS_MODULE, .act = tcf_vlan_act, .dump = tcf_vlan_dump, diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index e2b5cb2eb34e..9ad53895e604 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -31,6 +31,13 @@ #include <net/netlink.h> #include <net/pkt_sched.h> #include <net/pkt_cls.h> +#include <net/tc_act/tc_pedit.h> +#include <net/tc_act/tc_mirred.h> +#include <net/tc_act/tc_vlan.h> +#include <net/tc_act/tc_tunnel_key.h> +#include <net/tc_act/tc_csum.h> +#include <net/tc_act/tc_gact.h> +#include <net/tc_act/tc_skbedit.h> extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1]; @@ -61,7 +68,8 @@ static const struct tcf_proto_ops *__tcf_proto_lookup_ops(const char *kind) } static const struct tcf_proto_ops * -tcf_proto_lookup_ops(const char *kind, struct netlink_ext_ack *extack) +tcf_proto_lookup_ops(const char *kind, bool rtnl_held, + struct netlink_ext_ack *extack) { const struct tcf_proto_ops *ops; @@ -69,9 +77,11 @@ tcf_proto_lookup_ops(const char *kind, struct netlink_ext_ack *extack) if (ops) return ops; #ifdef CONFIG_MODULES - rtnl_unlock(); + if (rtnl_held) + rtnl_unlock(); request_module("cls_%s", kind); - rtnl_lock(); + if (rtnl_held) + rtnl_lock(); ops = __tcf_proto_lookup_ops(kind); /* We dropped the RTNL semaphore in order to perform * the module load. So, even if we succeeded in loading @@ -152,8 +162,26 @@ static inline u32 tcf_auto_prio(struct tcf_proto *tp) return TC_H_MAJ(first); } +static bool tcf_proto_is_unlocked(const char *kind) +{ + const struct tcf_proto_ops *ops; + bool ret; + + ops = tcf_proto_lookup_ops(kind, false, NULL); + /* On error return false to take rtnl lock. Proto lookup/create + * functions will perform lookup again and properly handle errors. + */ + if (IS_ERR(ops)) + return false; + + ret = !!(ops->flags & TCF_PROTO_OPS_DOIT_UNLOCKED); + module_put(ops->owner); + return ret; +} + static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol, u32 prio, struct tcf_chain *chain, + bool rtnl_held, struct netlink_ext_ack *extack) { struct tcf_proto *tp; @@ -163,7 +191,7 @@ static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol, if (!tp) return ERR_PTR(-ENOBUFS); - tp->ops = tcf_proto_lookup_ops(kind, extack); + tp->ops = tcf_proto_lookup_ops(kind, rtnl_held, extack); if (IS_ERR(tp->ops)) { err = PTR_ERR(tp->ops); goto errout; @@ -172,6 +200,8 @@ static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol, tp->protocol = protocol; tp->prio = prio; tp->chain = chain; + spin_lock_init(&tp->lock); + refcount_set(&tp->refcnt, 1); err = tp->ops->init(tp); if (err) { @@ -185,14 +215,75 @@ errout: return ERR_PTR(err); } -static void tcf_proto_destroy(struct tcf_proto *tp, +static void tcf_proto_get(struct tcf_proto *tp) +{ + refcount_inc(&tp->refcnt); +} + +static void tcf_chain_put(struct tcf_chain *chain); + +static void tcf_proto_destroy(struct tcf_proto *tp, bool rtnl_held, struct netlink_ext_ack *extack) { - tp->ops->destroy(tp, extack); + tp->ops->destroy(tp, rtnl_held, extack); + tcf_chain_put(tp->chain); module_put(tp->ops->owner); kfree_rcu(tp, rcu); } +static void tcf_proto_put(struct tcf_proto *tp, bool rtnl_held, + struct netlink_ext_ack *extack) +{ + if (refcount_dec_and_test(&tp->refcnt)) + tcf_proto_destroy(tp, rtnl_held, extack); +} + +static int walker_noop(struct tcf_proto *tp, void *d, struct tcf_walker *arg) +{ + return -1; +} + +static bool tcf_proto_is_empty(struct tcf_proto *tp, bool rtnl_held) +{ + struct tcf_walker walker = { .fn = walker_noop, }; + + if (tp->ops->walk) { + tp->ops->walk(tp, &walker, rtnl_held); + return !walker.stop; + } + return true; +} + +static bool tcf_proto_check_delete(struct tcf_proto *tp, bool rtnl_held) +{ + spin_lock(&tp->lock); + if (tcf_proto_is_empty(tp, rtnl_held)) + tp->deleting = true; + spin_unlock(&tp->lock); + return tp->deleting; +} + +static void tcf_proto_mark_delete(struct tcf_proto *tp) +{ + spin_lock(&tp->lock); + tp->deleting = true; + spin_unlock(&tp->lock); +} + +static bool tcf_proto_is_deleting(struct tcf_proto *tp) +{ + bool deleting; + + spin_lock(&tp->lock); + deleting = tp->deleting; + spin_unlock(&tp->lock); + + return deleting; +} + +#define ASSERT_BLOCK_LOCKED(block) \ + lockdep_assert_held(&(block)->lock) + struct tcf_filter_chain_list_item { struct list_head list; tcf_chain_head_change_t *chain_head_change; @@ -204,10 +295,13 @@ static struct tcf_chain *tcf_chain_create(struct tcf_block *block, { struct tcf_chain *chain; + ASSERT_BLOCK_LOCKED(block); + chain = kzalloc(sizeof(*chain), GFP_KERNEL); if (!chain) return NULL; list_add_tail(&chain->list, &block->chain_list); + mutex_init(&chain->filter_chain_lock); chain->block = block; chain->index = chain_index; chain->refcnt = 1; @@ -231,29 +325,59 @@ static void tcf_chain0_head_change(struct tcf_chain *chain, if (chain->index) return; + + mutex_lock(&block->lock); list_for_each_entry(item, &block->chain0.filter_chain_list, list) tcf_chain_head_change_item(item, tp_head); + mutex_unlock(&block->lock); } -static void tcf_chain_destroy(struct tcf_chain *chain) +/* Returns true if block can be safely freed. */ + +static bool tcf_chain_detach(struct tcf_chain *chain) { struct tcf_block *block = chain->block; + ASSERT_BLOCK_LOCKED(block); + list_del(&chain->list); if (!chain->index) block->chain0.chain = NULL; + + if (list_empty(&block->chain_list) && + refcount_read(&block->refcnt) == 0) + return true; + + return false; +} + +static void tcf_block_destroy(struct tcf_block *block) +{ + mutex_destroy(&block->lock); + kfree_rcu(block, rcu); +} + +static void tcf_chain_destroy(struct tcf_chain *chain, bool free_block) +{ + struct tcf_block *block = chain->block; + + mutex_destroy(&chain->filter_chain_lock); kfree(chain); - if (list_empty(&block->chain_list) && !refcount_read(&block->refcnt)) - kfree_rcu(block, rcu); + if (free_block) + tcf_block_destroy(block); } static void tcf_chain_hold(struct tcf_chain *chain) { + ASSERT_BLOCK_LOCKED(chain->block); + ++chain->refcnt; } static bool tcf_chain_held_by_acts_only(struct tcf_chain *chain) { + ASSERT_BLOCK_LOCKED(chain->block); + /* In case all the references are action references, this * chain should not be shown to the user. */ @@ -265,6 +389,8 @@ static struct tcf_chain *tcf_chain_lookup(struct tcf_block *block, { struct tcf_chain *chain; + ASSERT_BLOCK_LOCKED(block); + list_for_each_entry(chain, &block->chain_list, list) { if (chain->index == chain_index) return chain; @@ -279,31 +405,40 @@ static struct tcf_chain *__tcf_chain_get(struct tcf_block *block, u32 chain_index, bool create, bool by_act) { - struct tcf_chain *chain = tcf_chain_lookup(block, chain_index); + struct tcf_chain *chain = NULL; + bool is_first_reference; + mutex_lock(&block->lock); + chain = tcf_chain_lookup(block, chain_index); if (chain) { tcf_chain_hold(chain); } else { if (!create) - return NULL; + goto errout; chain = tcf_chain_create(block, chain_index); if (!chain) - return NULL; + goto errout; } if (by_act) ++chain->action_refcnt; + is_first_reference = chain->refcnt - chain->action_refcnt == 1; + mutex_unlock(&block->lock); /* Send notification only in case we got the first * non-action reference. Until then, the chain acts only as * a placeholder for actions pointing to it and user ought * not know about them. */ - if (chain->refcnt - chain->action_refcnt == 1 && !by_act) + if (is_first_reference && !by_act) tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL, RTM_NEWCHAIN, false); return chain; + +errout: + mutex_unlock(&block->lock); + return chain; } static struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index, @@ -318,51 +453,94 @@ struct tcf_chain *tcf_chain_get_by_act(struct tcf_block *block, u32 chain_index) } EXPORT_SYMBOL(tcf_chain_get_by_act); -static void tc_chain_tmplt_del(struct tcf_chain *chain); +static void tc_chain_tmplt_del(const struct tcf_proto_ops *tmplt_ops, + void *tmplt_priv); +static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops, + void *tmplt_priv, u32 chain_index, + struct tcf_block *block, struct sk_buff *oskb, + u32 seq, u16 flags, bool unicast); -static void __tcf_chain_put(struct tcf_chain *chain, bool by_act) +static void __tcf_chain_put(struct tcf_chain *chain, bool by_act, + bool explicitly_created) { + struct tcf_block *block = chain->block; + const struct tcf_proto_ops *tmplt_ops; + bool is_last, free_block = false; + unsigned int refcnt; + void *tmplt_priv; + u32 chain_index; + + mutex_lock(&block->lock); + if (explicitly_created) { + if (!chain->explicitly_created) { + mutex_unlock(&block->lock); + return; + } + chain->explicitly_created = false; + } + if (by_act) chain->action_refcnt--; - chain->refcnt--; + + /* tc_chain_notify_delete can't be called while holding block lock. + * However, when block is unlocked chain can be changed concurrently, so + * save these to temporary variables. + */ + refcnt = --chain->refcnt; + is_last = refcnt - chain->action_refcnt == 0; + tmplt_ops = chain->tmplt_ops; + tmplt_priv = chain->tmplt_priv; + chain_index = chain->index; + + if (refcnt == 0) + free_block = tcf_chain_detach(chain); + mutex_unlock(&block->lock); /* The last dropped non-action reference will trigger notification. */ - if (chain->refcnt - chain->action_refcnt == 0 && !by_act) - tc_chain_notify(chain, NULL, 0, 0, RTM_DELCHAIN, false); + if (is_last && !by_act) { + tc_chain_notify_delete(tmplt_ops, tmplt_priv, chain_index, + block, NULL, 0, 0, false); + /* Last reference to chain, no need to lock. */ + chain->flushing = false; + } - if (chain->refcnt == 0) { - tc_chain_tmplt_del(chain); - tcf_chain_destroy(chain); + if (refcnt == 0) { + tc_chain_tmplt_del(tmplt_ops, tmplt_priv); + tcf_chain_destroy(chain, free_block); } } static void tcf_chain_put(struct tcf_chain *chain) { - __tcf_chain_put(chain, false); + __tcf_chain_put(chain, false, false); } void tcf_chain_put_by_act(struct tcf_chain *chain) { - __tcf_chain_put(chain, true); + __tcf_chain_put(chain, true, false); } EXPORT_SYMBOL(tcf_chain_put_by_act); static void tcf_chain_put_explicitly_created(struct tcf_chain *chain) { - if (chain->explicitly_created) - tcf_chain_put(chain); + __tcf_chain_put(chain, false, true); } -static void tcf_chain_flush(struct tcf_chain *chain) +static void tcf_chain_flush(struct tcf_chain *chain, bool rtnl_held) { - struct tcf_proto *tp = rtnl_dereference(chain->filter_chain); + struct tcf_proto *tp, *tp_next; + mutex_lock(&chain->filter_chain_lock); + tp = tcf_chain_dereference(chain->filter_chain, chain); + RCU_INIT_POINTER(chain->filter_chain, NULL); tcf_chain0_head_change(chain, NULL); + chain->flushing = true; + mutex_unlock(&chain->filter_chain_lock); + while (tp) { - RCU_INIT_POINTER(chain->filter_chain, tp->next); - tcf_proto_destroy(tp, NULL); - tp = rtnl_dereference(chain->filter_chain); - tcf_chain_put(chain); + tp_next = rcu_dereference_protected(tp->next, 1); + tcf_proto_put(tp, rtnl_held, NULL); + tp = tp_next; } } @@ -684,8 +862,8 @@ tcf_chain0_head_change_cb_add(struct tcf_block *block, struct tcf_block_ext_info *ei, struct netlink_ext_ack *extack) { - struct tcf_chain *chain0 = block->chain0.chain; struct tcf_filter_chain_list_item *item; + struct tcf_chain *chain0; item = kmalloc(sizeof(*item), GFP_KERNEL); if (!item) { @@ -694,9 +872,32 @@ tcf_chain0_head_change_cb_add(struct tcf_block *block, } item->chain_head_change = ei->chain_head_change; item->chain_head_change_priv = ei->chain_head_change_priv; - if (chain0 && chain0->filter_chain) - tcf_chain_head_change_item(item, chain0->filter_chain); - list_add(&item->list, &block->chain0.filter_chain_list); + + mutex_lock(&block->lock); + chain0 = block->chain0.chain; + if (chain0) + tcf_chain_hold(chain0); + else + list_add(&item->list, &block->chain0.filter_chain_list); + mutex_unlock(&block->lock); + + if (chain0) { + struct tcf_proto *tp_head; + + mutex_lock(&chain0->filter_chain_lock); + + tp_head = tcf_chain_dereference(chain0->filter_chain, chain0); + if (tp_head) + tcf_chain_head_change_item(item, tp_head); + + mutex_lock(&block->lock); + list_add(&item->list, &block->chain0.filter_chain_list); + mutex_unlock(&block->lock); + + mutex_unlock(&chain0->filter_chain_lock); + tcf_chain_put(chain0); + } + return 0; } @@ -704,20 +905,23 @@ static void tcf_chain0_head_change_cb_del(struct tcf_block *block, struct tcf_block_ext_info *ei) { - struct tcf_chain *chain0 = block->chain0.chain; struct tcf_filter_chain_list_item *item; + mutex_lock(&block->lock); list_for_each_entry(item, &block->chain0.filter_chain_list, list) { if ((!ei->chain_head_change && !ei->chain_head_change_priv) || (item->chain_head_change == ei->chain_head_change && item->chain_head_change_priv == ei->chain_head_change_priv)) { - if (chain0) + if (block->chain0.chain) tcf_chain_head_change_item(item, NULL); list_del(&item->list); + mutex_unlock(&block->lock); + kfree(item); return; } } + mutex_unlock(&block->lock); WARN_ON(1); } @@ -764,6 +968,7 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q, NL_SET_ERR_MSG(extack, "Memory allocation for block failed"); return ERR_PTR(-ENOMEM); } + mutex_init(&block->lock); INIT_LIST_HEAD(&block->chain_list); INIT_LIST_HEAD(&block->cb_list); INIT_LIST_HEAD(&block->owner_list); @@ -799,157 +1004,241 @@ static struct tcf_block *tcf_block_refcnt_get(struct net *net, u32 block_index) return block; } -static void tcf_block_flush_all_chains(struct tcf_block *block) +static struct tcf_chain * +__tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain) { - struct tcf_chain *chain; + mutex_lock(&block->lock); + if (chain) + chain = list_is_last(&chain->list, &block->chain_list) ? + NULL : list_next_entry(chain, list); + else + chain = list_first_entry_or_null(&block->chain_list, + struct tcf_chain, list); - /* Hold a refcnt for all chains, so that they don't disappear - * while we are iterating. - */ - list_for_each_entry(chain, &block->chain_list, list) + /* skip all action-only chains */ + while (chain && tcf_chain_held_by_acts_only(chain)) + chain = list_is_last(&chain->list, &block->chain_list) ? + NULL : list_next_entry(chain, list); + + if (chain) tcf_chain_hold(chain); + mutex_unlock(&block->lock); - list_for_each_entry(chain, &block->chain_list, list) - tcf_chain_flush(chain); + return chain; } -static void tcf_block_put_all_chains(struct tcf_block *block) +/* Function to be used by all clients that want to iterate over all chains on + * block. It properly obtains block->lock and takes reference to chain before + * returning it. Users of this function must be tolerant to concurrent chain + * insertion/deletion or ensure that no concurrent chain modification is + * possible. Note that all netlink dump callbacks cannot guarantee to provide + * consistent dump because rtnl lock is released each time skb is filled with + * data and sent to user-space. + */ + +struct tcf_chain * +tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain) { - struct tcf_chain *chain, *tmp; + struct tcf_chain *chain_next = __tcf_get_next_chain(block, chain); - /* At this point, all the chains should have refcnt >= 1. */ - list_for_each_entry_safe(chain, tmp, &block->chain_list, list) { - tcf_chain_put_explicitly_created(chain); + if (chain) tcf_chain_put(chain); - } + + return chain_next; } +EXPORT_SYMBOL(tcf_get_next_chain); -static void __tcf_block_put(struct tcf_block *block, struct Qdisc *q, - struct tcf_block_ext_info *ei) +static struct tcf_proto * +__tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp) { - if (refcount_dec_and_test(&block->refcnt)) { - /* Flushing/putting all chains will cause the block to be - * deallocated when last chain is freed. However, if chain_list - * is empty, block has to be manually deallocated. After block - * reference counter reached 0, it is no longer possible to - * increment it or add new chains to block. - */ - bool free_block = list_empty(&block->chain_list); + u32 prio = 0; - if (tcf_block_shared(block)) - tcf_block_remove(block, block->net); - if (!free_block) - tcf_block_flush_all_chains(block); + ASSERT_RTNL(); + mutex_lock(&chain->filter_chain_lock); - if (q) - tcf_block_offload_unbind(block, q, ei); + if (!tp) { + tp = tcf_chain_dereference(chain->filter_chain, chain); + } else if (tcf_proto_is_deleting(tp)) { + /* 'deleting' flag is set and chain->filter_chain_lock was + * unlocked, which means next pointer could be invalid. Restart + * search. + */ + prio = tp->prio + 1; + tp = tcf_chain_dereference(chain->filter_chain, chain); - if (free_block) - kfree_rcu(block, rcu); - else - tcf_block_put_all_chains(block); - } else if (q) { - tcf_block_offload_unbind(block, q, ei); + for (; tp; tp = tcf_chain_dereference(tp->next, chain)) + if (!tp->deleting && tp->prio >= prio) + break; + } else { + tp = tcf_chain_dereference(tp->next, chain); } + + if (tp) + tcf_proto_get(tp); + + mutex_unlock(&chain->filter_chain_lock); + + return tp; } -static void tcf_block_refcnt_put(struct tcf_block *block) +/* Function to be used by all clients that want to iterate over all tp's on + * chain. Users of this function must be tolerant to concurrent tp + * insertion/deletion or ensure that no concurrent chain modification is + * possible. Note that all netlink dump callbacks cannot guarantee to provide + * consistent dump because rtnl lock is released each time skb is filled with + * data and sent to user-space. + */ + +struct tcf_proto * +tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp, + bool rtnl_held) { - __tcf_block_put(block, NULL, NULL); + struct tcf_proto *tp_next = __tcf_get_next_proto(chain, tp); + + if (tp) + tcf_proto_put(tp, rtnl_held, NULL); + + return tp_next; } +EXPORT_SYMBOL(tcf_get_next_proto); -/* Find tcf block. - * Set q, parent, cl when appropriate. +static void tcf_block_flush_all_chains(struct tcf_block *block, bool rtnl_held) +{ + struct tcf_chain *chain; + + /* Last reference to block. At this point chains cannot be added or + * removed concurrently. + */ + for (chain = tcf_get_next_chain(block, NULL); + chain; + chain = tcf_get_next_chain(block, chain)) { + tcf_chain_put_explicitly_created(chain); + tcf_chain_flush(chain, rtnl_held); + } +} + +/* Lookup Qdisc and increments its reference counter. + * Set parent, if necessary. */ -static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q, - u32 *parent, unsigned long *cl, - int ifindex, u32 block_index, - struct netlink_ext_ack *extack) +static int __tcf_qdisc_find(struct net *net, struct Qdisc **q, + u32 *parent, int ifindex, bool rtnl_held, + struct netlink_ext_ack *extack) { - struct tcf_block *block; + const struct Qdisc_class_ops *cops; + struct net_device *dev; int err = 0; - if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) { - block = tcf_block_refcnt_get(net, block_index); - if (!block) { - NL_SET_ERR_MSG(extack, "Block of given index was not found"); - return ERR_PTR(-EINVAL); - } - } else { - const struct Qdisc_class_ops *cops; - struct net_device *dev; - - rcu_read_lock(); + if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) + return 0; - /* Find link */ - dev = dev_get_by_index_rcu(net, ifindex); - if (!dev) { - rcu_read_unlock(); - return ERR_PTR(-ENODEV); - } + rcu_read_lock(); - /* Find qdisc */ - if (!*parent) { - *q = dev->qdisc; - *parent = (*q)->handle; - } else { - *q = qdisc_lookup_rcu(dev, TC_H_MAJ(*parent)); - if (!*q) { - NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists"); - err = -EINVAL; - goto errout_rcu; - } - } + /* Find link */ + dev = dev_get_by_index_rcu(net, ifindex); + if (!dev) { + rcu_read_unlock(); + return -ENODEV; + } - *q = qdisc_refcount_inc_nz(*q); + /* Find qdisc */ + if (!*parent) { + *q = dev->qdisc; + *parent = (*q)->handle; + } else { + *q = qdisc_lookup_rcu(dev, TC_H_MAJ(*parent)); if (!*q) { NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists"); err = -EINVAL; goto errout_rcu; } + } - /* Is it classful? */ - cops = (*q)->ops->cl_ops; - if (!cops) { - NL_SET_ERR_MSG(extack, "Qdisc not classful"); - err = -EINVAL; - goto errout_rcu; - } + *q = qdisc_refcount_inc_nz(*q); + if (!*q) { + NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists"); + err = -EINVAL; + goto errout_rcu; + } - if (!cops->tcf_block) { - NL_SET_ERR_MSG(extack, "Class doesn't support blocks"); - err = -EOPNOTSUPP; - goto errout_rcu; - } + /* Is it classful? */ + cops = (*q)->ops->cl_ops; + if (!cops) { + NL_SET_ERR_MSG(extack, "Qdisc not classful"); + err = -EINVAL; + goto errout_qdisc; + } - /* At this point we know that qdisc is not noop_qdisc, - * which means that qdisc holds a reference to net_device - * and we hold a reference to qdisc, so it is safe to release - * rcu read lock. - */ - rcu_read_unlock(); + if (!cops->tcf_block) { + NL_SET_ERR_MSG(extack, "Class doesn't support blocks"); + err = -EOPNOTSUPP; + goto errout_qdisc; + } - /* Do we search for filter, attached to class? */ - if (TC_H_MIN(*parent)) { - *cl = cops->find(*q, *parent); - if (*cl == 0) { - NL_SET_ERR_MSG(extack, "Specified class doesn't exist"); - err = -ENOENT; - goto errout_qdisc; - } +errout_rcu: + /* At this point we know that qdisc is not noop_qdisc, + * which means that qdisc holds a reference to net_device + * and we hold a reference to qdisc, so it is safe to release + * rcu read lock. + */ + rcu_read_unlock(); + return err; + +errout_qdisc: + rcu_read_unlock(); + + if (rtnl_held) + qdisc_put(*q); + else + qdisc_put_unlocked(*q); + *q = NULL; + + return err; +} + +static int __tcf_qdisc_cl_find(struct Qdisc *q, u32 parent, unsigned long *cl, + int ifindex, struct netlink_ext_ack *extack) +{ + if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) + return 0; + + /* Do we search for filter, attached to class? */ + if (TC_H_MIN(parent)) { + const struct Qdisc_class_ops *cops = q->ops->cl_ops; + + *cl = cops->find(q, parent); + if (*cl == 0) { + NL_SET_ERR_MSG(extack, "Specified class doesn't exist"); + return -ENOENT; } + } - /* And the last stroke */ - block = cops->tcf_block(*q, *cl, extack); + return 0; +} + +static struct tcf_block *__tcf_block_find(struct net *net, struct Qdisc *q, + unsigned long cl, int ifindex, + u32 block_index, + struct netlink_ext_ack *extack) +{ + struct tcf_block *block; + + if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) { + block = tcf_block_refcnt_get(net, block_index); if (!block) { - err = -EINVAL; - goto errout_qdisc; + NL_SET_ERR_MSG(extack, "Block of given index was not found"); + return ERR_PTR(-EINVAL); } + } else { + const struct Qdisc_class_ops *cops = q->ops->cl_ops; + + block = cops->tcf_block(q, cl, extack); + if (!block) + return ERR_PTR(-EINVAL); + if (tcf_block_shared(block)) { NL_SET_ERR_MSG(extack, "This filter block is shared. Please use the block index to manipulate the filters"); - err = -EOPNOTSUPP; - goto errout_qdisc; + return ERR_PTR(-EOPNOTSUPP); } /* Always take reference to block in order to support execution @@ -962,24 +1251,89 @@ static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q, } return block; +} + +static void __tcf_block_put(struct tcf_block *block, struct Qdisc *q, + struct tcf_block_ext_info *ei, bool rtnl_held) +{ + if (refcount_dec_and_mutex_lock(&block->refcnt, &block->lock)) { + /* Flushing/putting all chains will cause the block to be + * deallocated when last chain is freed. However, if chain_list + * is empty, block has to be manually deallocated. After block + * reference counter reached 0, it is no longer possible to + * increment it or add new chains to block. + */ + bool free_block = list_empty(&block->chain_list); + + mutex_unlock(&block->lock); + if (tcf_block_shared(block)) + tcf_block_remove(block, block->net); + + if (q) + tcf_block_offload_unbind(block, q, ei); + + if (free_block) + tcf_block_destroy(block); + else + tcf_block_flush_all_chains(block, rtnl_held); + } else if (q) { + tcf_block_offload_unbind(block, q, ei); + } +} + +static void tcf_block_refcnt_put(struct tcf_block *block, bool rtnl_held) +{ + __tcf_block_put(block, NULL, NULL, rtnl_held); +} + +/* Find tcf block. + * Set q, parent, cl when appropriate. + */ + +static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q, + u32 *parent, unsigned long *cl, + int ifindex, u32 block_index, + struct netlink_ext_ack *extack) +{ + struct tcf_block *block; + int err = 0; + + ASSERT_RTNL(); + + err = __tcf_qdisc_find(net, q, parent, ifindex, true, extack); + if (err) + goto errout; + + err = __tcf_qdisc_cl_find(*q, *parent, cl, ifindex, extack); + if (err) + goto errout_qdisc; + + block = __tcf_block_find(net, *q, *cl, ifindex, block_index, extack); + if (IS_ERR(block)) + goto errout_qdisc; + + return block; -errout_rcu: - rcu_read_unlock(); errout_qdisc: - if (*q) { + if (*q) qdisc_put(*q); - *q = NULL; - } +errout: + *q = NULL; return ERR_PTR(err); } -static void tcf_block_release(struct Qdisc *q, struct tcf_block *block) +static void tcf_block_release(struct Qdisc *q, struct tcf_block *block, + bool rtnl_held) { if (!IS_ERR_OR_NULL(block)) - tcf_block_refcnt_put(block); + tcf_block_refcnt_put(block, rtnl_held); - if (q) - qdisc_put(q); + if (q) { + if (rtnl_held) + qdisc_put(q); + else + qdisc_put_unlocked(q); + } } struct tcf_block_owner_item { @@ -1087,7 +1441,7 @@ err_chain0_head_change_cb_add: tcf_block_owner_del(block, q, ei->binder_type); err_block_owner_add: err_block_insert: - tcf_block_refcnt_put(block); + tcf_block_refcnt_put(block, true); return err; } EXPORT_SYMBOL(tcf_block_get_ext); @@ -1124,7 +1478,7 @@ void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q, tcf_chain0_head_change_cb_del(block, ei); tcf_block_owner_del(block, q, ei->binder_type); - __tcf_block_put(block, q, ei); + __tcf_block_put(block, q, ei, true); } EXPORT_SYMBOL(tcf_block_put_ext); @@ -1181,13 +1535,19 @@ tcf_block_playback_offloads(struct tcf_block *block, tc_setup_cb_t *cb, void *cb_priv, bool add, bool offload_in_use, struct netlink_ext_ack *extack) { - struct tcf_chain *chain; - struct tcf_proto *tp; + struct tcf_chain *chain, *chain_prev; + struct tcf_proto *tp, *tp_prev; int err; - list_for_each_entry(chain, &block->chain_list, list) { - for (tp = rtnl_dereference(chain->filter_chain); tp; - tp = rtnl_dereference(tp->next)) { + for (chain = __tcf_get_next_chain(block, NULL); + chain; + chain_prev = chain, + chain = __tcf_get_next_chain(block, chain), + tcf_chain_put(chain_prev)) { + for (tp = __tcf_get_next_proto(chain, NULL); tp; + tp_prev = tp, + tp = __tcf_get_next_proto(chain, tp), + tcf_proto_put(tp_prev, true, NULL)) { if (tp->ops->reoffload) { err = tp->ops->reoffload(tp, add, cb, cb_priv, extack); @@ -1204,6 +1564,8 @@ tcf_block_playback_offloads(struct tcf_block *block, tc_setup_cb_t *cb, return 0; err_playback_remove: + tcf_proto_put(tp, true, NULL); + tcf_chain_put(chain); tcf_block_playback_offloads(block, cb, cb_priv, false, offload_in_use, extack); return err; @@ -1329,32 +1691,116 @@ struct tcf_chain_info { struct tcf_proto __rcu *next; }; -static struct tcf_proto *tcf_chain_tp_prev(struct tcf_chain_info *chain_info) +static struct tcf_proto *tcf_chain_tp_prev(struct tcf_chain *chain, + struct tcf_chain_info *chain_info) { - return rtnl_dereference(*chain_info->pprev); + return tcf_chain_dereference(*chain_info->pprev, chain); } -static void tcf_chain_tp_insert(struct tcf_chain *chain, - struct tcf_chain_info *chain_info, - struct tcf_proto *tp) +static int tcf_chain_tp_insert(struct tcf_chain *chain, + struct tcf_chain_info *chain_info, + struct tcf_proto *tp) { + if (chain->flushing) + return -EAGAIN; + if (*chain_info->pprev == chain->filter_chain) tcf_chain0_head_change(chain, tp); - RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain_info)); + tcf_proto_get(tp); + RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain, chain_info)); rcu_assign_pointer(*chain_info->pprev, tp); - tcf_chain_hold(chain); + + return 0; } static void tcf_chain_tp_remove(struct tcf_chain *chain, struct tcf_chain_info *chain_info, struct tcf_proto *tp) { - struct tcf_proto *next = rtnl_dereference(chain_info->next); + struct tcf_proto *next = tcf_chain_dereference(chain_info->next, chain); + tcf_proto_mark_delete(tp); if (tp == chain->filter_chain) tcf_chain0_head_change(chain, next); RCU_INIT_POINTER(*chain_info->pprev, next); - tcf_chain_put(chain); +} + +static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain, + struct tcf_chain_info *chain_info, + u32 protocol, u32 prio, + bool prio_allocate); + +/* Try to insert new proto. + * If proto with specified priority already exists, free new proto + * and return existing one. + */ + +static struct tcf_proto *tcf_chain_tp_insert_unique(struct tcf_chain *chain, + struct tcf_proto *tp_new, + u32 protocol, u32 prio, + bool rtnl_held) +{ + struct tcf_chain_info chain_info; + struct tcf_proto *tp; + int err = 0; + + mutex_lock(&chain->filter_chain_lock); + + tp = tcf_chain_tp_find(chain, &chain_info, + protocol, prio, false); + if (!tp) + err = tcf_chain_tp_insert(chain, &chain_info, tp_new); + mutex_unlock(&chain->filter_chain_lock); + + if (tp) { + tcf_proto_destroy(tp_new, rtnl_held, NULL); + tp_new = tp; + } else if (err) { + tcf_proto_destroy(tp_new, rtnl_held, NULL); + tp_new = ERR_PTR(err); + } + + return tp_new; +} + +static void tcf_chain_tp_delete_empty(struct tcf_chain *chain, + struct tcf_proto *tp, bool rtnl_held, + struct netlink_ext_ack *extack) +{ + struct tcf_chain_info chain_info; + struct tcf_proto *tp_iter; + struct tcf_proto **pprev; + struct tcf_proto *next; + + mutex_lock(&chain->filter_chain_lock); + + /* Atomically find and remove tp from chain. */ + for (pprev = &chain->filter_chain; + (tp_iter = tcf_chain_dereference(*pprev, chain)); + pprev = &tp_iter->next) { + if (tp_iter == tp) { + chain_info.pprev = pprev; + chain_info.next = tp_iter->next; + WARN_ON(tp_iter->deleting); + break; + } + } + /* Verify that tp still exists and no new filters were inserted + * concurrently. + * Mark tp for deletion if it is empty. + */ + if (!tp_iter || !tcf_proto_check_delete(tp, rtnl_held)) { + mutex_unlock(&chain->filter_chain_lock); + return; + } + + next = tcf_chain_dereference(chain_info.next, chain); + if (tp == chain->filter_chain) + tcf_chain0_head_change(chain, next); + RCU_INIT_POINTER(*chain_info.pprev, next); + mutex_unlock(&chain->filter_chain_lock); + + tcf_proto_put(tp, rtnl_held, extack); } static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain, @@ -1367,7 +1813,8 @@ static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain, /* Check the chain for existence of proto-tcf with this priority */ for (pprev = &chain->filter_chain; - (tp = rtnl_dereference(*pprev)); pprev = &tp->next) { + (tp = tcf_chain_dereference(*pprev, chain)); + pprev = &tp->next) { if (tp->prio >= prio) { if (tp->prio == prio) { if (prio_allocate || @@ -1380,14 +1827,20 @@ static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain, } } chain_info->pprev = pprev; - chain_info->next = tp ? tp->next : NULL; + if (tp) { + chain_info->next = tp->next; + tcf_proto_get(tp); + } else { + chain_info->next = NULL; + } return tp; } static int tcf_fill_node(struct net *net, struct sk_buff *skb, struct tcf_proto *tp, struct tcf_block *block, struct Qdisc *q, u32 parent, void *fh, - u32 portid, u32 seq, u16 flags, int event) + u32 portid, u32 seq, u16 flags, int event, + bool rtnl_held) { struct tcmsg *tcm; struct nlmsghdr *nlh; @@ -1415,7 +1868,8 @@ static int tcf_fill_node(struct net *net, struct sk_buff *skb, if (!fh) { tcm->tcm_handle = 0; } else { - if (tp->ops->dump && tp->ops->dump(net, tp, fh, skb, tcm) < 0) + if (tp->ops->dump && + tp->ops->dump(net, tp, fh, skb, tcm, rtnl_held) < 0) goto nla_put_failure; } nlh->nlmsg_len = skb_tail_pointer(skb) - b; @@ -1430,7 +1884,8 @@ nla_put_failure: static int tfilter_notify(struct net *net, struct sk_buff *oskb, struct nlmsghdr *n, struct tcf_proto *tp, struct tcf_block *block, struct Qdisc *q, - u32 parent, void *fh, int event, bool unicast) + u32 parent, void *fh, int event, bool unicast, + bool rtnl_held) { struct sk_buff *skb; u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; @@ -1440,7 +1895,8 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb, return -ENOBUFS; if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid, - n->nlmsg_seq, n->nlmsg_flags, event) <= 0) { + n->nlmsg_seq, n->nlmsg_flags, event, + rtnl_held) <= 0) { kfree_skb(skb); return -EINVAL; } @@ -1456,7 +1912,7 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb, struct nlmsghdr *n, struct tcf_proto *tp, struct tcf_block *block, struct Qdisc *q, u32 parent, void *fh, bool unicast, bool *last, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { struct sk_buff *skb; u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; @@ -1467,13 +1923,14 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb, return -ENOBUFS; if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid, - n->nlmsg_seq, n->nlmsg_flags, RTM_DELTFILTER) <= 0) { + n->nlmsg_seq, n->nlmsg_flags, RTM_DELTFILTER, + rtnl_held) <= 0) { NL_SET_ERR_MSG(extack, "Failed to build del event notification"); kfree_skb(skb); return -EINVAL; } - err = tp->ops->delete(tp, fh, last, extack); + err = tp->ops->delete(tp, fh, last, rtnl_held, extack); if (err) { kfree_skb(skb); return err; @@ -1492,14 +1949,21 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb, static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb, struct tcf_block *block, struct Qdisc *q, u32 parent, struct nlmsghdr *n, - struct tcf_chain *chain, int event) + struct tcf_chain *chain, int event, + bool rtnl_held) { struct tcf_proto *tp; - for (tp = rtnl_dereference(chain->filter_chain); - tp; tp = rtnl_dereference(tp->next)) + for (tp = tcf_get_next_proto(chain, NULL, rtnl_held); + tp; tp = tcf_get_next_proto(chain, tp, rtnl_held)) tfilter_notify(net, oskb, n, tp, block, - q, parent, NULL, event, false); + q, parent, NULL, event, false, rtnl_held); +} + +static void tfilter_put(struct tcf_proto *tp, void *fh) +{ + if (tp->ops->put && fh) + tp->ops->put(tp, fh); } static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n, @@ -1522,6 +1986,7 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *fh; int err; int tp_created; + bool rtnl_held = false; if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) return -EPERM; @@ -1538,7 +2003,9 @@ replay: prio = TC_H_MAJ(t->tcm_info); prio_allocate = false; parent = t->tcm_parent; + tp = NULL; cl = 0; + block = NULL; if (prio == 0) { /* If no priority is provided by the user, @@ -1555,8 +2022,27 @@ replay: /* Find head of filter chain. */ - block = tcf_block_find(net, &q, &parent, &cl, - t->tcm_ifindex, t->tcm_block_index, extack); + err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack); + if (err) + return err; + + /* Take rtnl mutex if rtnl_held was set to true on previous iteration, + * block is shared (no qdisc found), qdisc is not unlocked, classifier + * type is not specified, classifier is not unlocked. + */ + if (rtnl_held || + (q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) || + !tca[TCA_KIND] || !tcf_proto_is_unlocked(nla_data(tca[TCA_KIND]))) { + rtnl_held = true; + rtnl_lock(); + } + + err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack); + if (err) + goto errout; + + block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index, + extack); if (IS_ERR(block)) { err = PTR_ERR(block); goto errout; @@ -1575,40 +2061,62 @@ replay: goto errout; } + mutex_lock(&chain->filter_chain_lock); tp = tcf_chain_tp_find(chain, &chain_info, protocol, prio, prio_allocate); if (IS_ERR(tp)) { NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found"); err = PTR_ERR(tp); - goto errout; + goto errout_locked; } if (tp == NULL) { + struct tcf_proto *tp_new = NULL; + + if (chain->flushing) { + err = -EAGAIN; + goto errout_locked; + } + /* Proto-tcf does not exist, create new one */ if (tca[TCA_KIND] == NULL || !protocol) { NL_SET_ERR_MSG(extack, "Filter kind and protocol must be specified"); err = -EINVAL; - goto errout; + goto errout_locked; } if (!(n->nlmsg_flags & NLM_F_CREATE)) { NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter"); err = -ENOENT; - goto errout; + goto errout_locked; } if (prio_allocate) - prio = tcf_auto_prio(tcf_chain_tp_prev(&chain_info)); + prio = tcf_auto_prio(tcf_chain_tp_prev(chain, + &chain_info)); - tp = tcf_proto_create(nla_data(tca[TCA_KIND]), - protocol, prio, chain, extack); + mutex_unlock(&chain->filter_chain_lock); + tp_new = tcf_proto_create(nla_data(tca[TCA_KIND]), + protocol, prio, chain, rtnl_held, + extack); + if (IS_ERR(tp_new)) { + err = PTR_ERR(tp_new); + goto errout_tp; + } + + tp_created = 1; + tp = tcf_chain_tp_insert_unique(chain, tp_new, protocol, prio, + rtnl_held); if (IS_ERR(tp)) { err = PTR_ERR(tp); - goto errout; + goto errout_tp; } - tp_created = 1; - } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) { + } else { + mutex_unlock(&chain->filter_chain_lock); + } + + if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) { NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one"); err = -EINVAL; goto errout; @@ -1623,6 +2131,7 @@ replay: goto errout; } } else if (n->nlmsg_flags & NLM_F_EXCL) { + tfilter_put(tp, fh); NL_SET_ERR_MSG(extack, "Filter already exists"); err = -EEXIST; goto errout; @@ -1636,25 +2145,41 @@ replay: err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh, n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE, - extack); + rtnl_held, extack); if (err == 0) { - if (tp_created) - tcf_chain_tp_insert(chain, &chain_info, tp); tfilter_notify(net, skb, n, tp, block, q, parent, fh, - RTM_NEWTFILTER, false); - } else { - if (tp_created) - tcf_proto_destroy(tp, NULL); + RTM_NEWTFILTER, false, rtnl_held); + tfilter_put(tp, fh); } errout: - if (chain) - tcf_chain_put(chain); - tcf_block_release(q, block); - if (err == -EAGAIN) + if (err && tp_created) + tcf_chain_tp_delete_empty(chain, tp, rtnl_held, NULL); +errout_tp: + if (chain) { + if (tp && !IS_ERR(tp)) + tcf_proto_put(tp, rtnl_held, NULL); + if (!tp_created) + tcf_chain_put(chain); + } + tcf_block_release(q, block, rtnl_held); + + if (rtnl_held) + rtnl_unlock(); + + if (err == -EAGAIN) { + /* Take rtnl lock in case EAGAIN is caused by concurrent flush + * of target chain. + */ + rtnl_held = true; /* Replay the request. */ goto replay; + } return err; + +errout_locked: + mutex_unlock(&chain->filter_chain_lock); + goto errout; } static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n, @@ -1670,11 +2195,12 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n, struct Qdisc *q = NULL; struct tcf_chain_info chain_info; struct tcf_chain *chain = NULL; - struct tcf_block *block; + struct tcf_block *block = NULL; struct tcf_proto *tp = NULL; unsigned long cl = 0; void *fh = NULL; int err; + bool rtnl_held = false; if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) return -EPERM; @@ -1695,8 +2221,27 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n, /* Find head of filter chain. */ - block = tcf_block_find(net, &q, &parent, &cl, - t->tcm_ifindex, t->tcm_block_index, extack); + err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack); + if (err) + return err; + + /* Take rtnl mutex if flushing whole chain, block is shared (no qdisc + * found), qdisc is not unlocked, classifier type is not specified, + * classifier is not unlocked. + */ + if (!prio || + (q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) || + !tca[TCA_KIND] || !tcf_proto_is_unlocked(nla_data(tca[TCA_KIND]))) { + rtnl_held = true; + rtnl_lock(); + } + + err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack); + if (err) + goto errout; + + block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index, + extack); if (IS_ERR(block)) { err = PTR_ERR(block); goto errout; @@ -1724,56 +2269,69 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n, if (prio == 0) { tfilter_notify_chain(net, skb, block, q, parent, n, - chain, RTM_DELTFILTER); - tcf_chain_flush(chain); + chain, RTM_DELTFILTER, rtnl_held); + tcf_chain_flush(chain, rtnl_held); err = 0; goto errout; } + mutex_lock(&chain->filter_chain_lock); tp = tcf_chain_tp_find(chain, &chain_info, protocol, prio, false); if (!tp || IS_ERR(tp)) { NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found"); err = tp ? PTR_ERR(tp) : -ENOENT; - goto errout; + goto errout_locked; } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) { NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one"); err = -EINVAL; + goto errout_locked; + } else if (t->tcm_handle == 0) { + tcf_chain_tp_remove(chain, &chain_info, tp); + mutex_unlock(&chain->filter_chain_lock); + + tcf_proto_put(tp, rtnl_held, NULL); + tfilter_notify(net, skb, n, tp, block, q, parent, fh, + RTM_DELTFILTER, false, rtnl_held); + err = 0; goto errout; } + mutex_unlock(&chain->filter_chain_lock); fh = tp->ops->get(tp, t->tcm_handle); if (!fh) { - if (t->tcm_handle == 0) { - tcf_chain_tp_remove(chain, &chain_info, tp); - tfilter_notify(net, skb, n, tp, block, q, parent, fh, - RTM_DELTFILTER, false); - tcf_proto_destroy(tp, extack); - err = 0; - } else { - NL_SET_ERR_MSG(extack, "Specified filter handle not found"); - err = -ENOENT; - } + NL_SET_ERR_MSG(extack, "Specified filter handle not found"); + err = -ENOENT; } else { bool last; err = tfilter_del_notify(net, skb, n, tp, block, q, parent, fh, false, &last, - extack); + rtnl_held, extack); + if (err) goto errout; - if (last) { - tcf_chain_tp_remove(chain, &chain_info, tp); - tcf_proto_destroy(tp, extack); - } + if (last) + tcf_chain_tp_delete_empty(chain, tp, rtnl_held, extack); } errout: - if (chain) + if (chain) { + if (tp && !IS_ERR(tp)) + tcf_proto_put(tp, rtnl_held, NULL); tcf_chain_put(chain); - tcf_block_release(q, block); + } + tcf_block_release(q, block, rtnl_held); + + if (rtnl_held) + rtnl_unlock(); + return err; + +errout_locked: + mutex_unlock(&chain->filter_chain_lock); + goto errout; } static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n, @@ -1789,11 +2347,12 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n, struct Qdisc *q = NULL; struct tcf_chain_info chain_info; struct tcf_chain *chain = NULL; - struct tcf_block *block; + struct tcf_block *block = NULL; struct tcf_proto *tp = NULL; unsigned long cl = 0; void *fh = NULL; int err; + bool rtnl_held = false; err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack); if (err < 0) @@ -1811,8 +2370,26 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n, /* Find head of filter chain. */ - block = tcf_block_find(net, &q, &parent, &cl, - t->tcm_ifindex, t->tcm_block_index, extack); + err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack); + if (err) + return err; + + /* Take rtnl mutex if block is shared (no qdisc found), qdisc is not + * unlocked, classifier type is not specified, classifier is not + * unlocked. + */ + if ((q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) || + !tca[TCA_KIND] || !tcf_proto_is_unlocked(nla_data(tca[TCA_KIND]))) { + rtnl_held = true; + rtnl_lock(); + } + + err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack); + if (err) + goto errout; + + block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index, + extack); if (IS_ERR(block)) { err = PTR_ERR(block); goto errout; @@ -1831,8 +2408,10 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n, goto errout; } + mutex_lock(&chain->filter_chain_lock); tp = tcf_chain_tp_find(chain, &chain_info, protocol, prio, false); + mutex_unlock(&chain->filter_chain_lock); if (!tp || IS_ERR(tp)) { NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found"); err = tp ? PTR_ERR(tp) : -ENOENT; @@ -1850,15 +2429,23 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n, err = -ENOENT; } else { err = tfilter_notify(net, skb, n, tp, block, q, parent, - fh, RTM_NEWTFILTER, true); + fh, RTM_NEWTFILTER, true, rtnl_held); if (err < 0) NL_SET_ERR_MSG(extack, "Failed to send filter notify message"); } + tfilter_put(tp, fh); errout: - if (chain) + if (chain) { + if (tp && !IS_ERR(tp)) + tcf_proto_put(tp, rtnl_held, NULL); tcf_chain_put(chain); - tcf_block_release(q, block); + } + tcf_block_release(q, block, rtnl_held); + + if (rtnl_held) + rtnl_unlock(); + return err; } @@ -1879,7 +2466,7 @@ static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg) return tcf_fill_node(net, a->skb, tp, a->block, a->q, a->parent, n, NETLINK_CB(a->cb->skb).portid, a->cb->nlh->nlmsg_seq, NLM_F_MULTI, - RTM_NEWTFILTER); + RTM_NEWTFILTER, true); } static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent, @@ -1889,11 +2476,15 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent, struct net *net = sock_net(skb->sk); struct tcf_block *block = chain->block; struct tcmsg *tcm = nlmsg_data(cb->nlh); + struct tcf_proto *tp, *tp_prev; struct tcf_dump_args arg; - struct tcf_proto *tp; - for (tp = rtnl_dereference(chain->filter_chain); - tp; tp = rtnl_dereference(tp->next), (*p_index)++) { + for (tp = __tcf_get_next_proto(chain, NULL); + tp; + tp_prev = tp, + tp = __tcf_get_next_proto(chain, tp), + tcf_proto_put(tp_prev, true, NULL), + (*p_index)++) { if (*p_index < index_start) continue; if (TC_H_MAJ(tcm->tcm_info) && @@ -1909,9 +2500,8 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent, if (tcf_fill_node(net, skb, tp, block, q, parent, NULL, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, - RTM_NEWTFILTER) <= 0) - return false; - + RTM_NEWTFILTER, true) <= 0) + goto errout; cb->args[1] = 1; } if (!tp->ops->walk) @@ -1926,23 +2516,27 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent, arg.w.skip = cb->args[1] - 1; arg.w.count = 0; arg.w.cookie = cb->args[2]; - tp->ops->walk(tp, &arg.w); + tp->ops->walk(tp, &arg.w, true); cb->args[2] = arg.w.cookie; cb->args[1] = arg.w.count + 1; if (arg.w.stop) - return false; + goto errout; } return true; + +errout: + tcf_proto_put(tp, true, NULL); + return false; } /* called with RTNL */ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) { + struct tcf_chain *chain, *chain_prev; struct net *net = sock_net(skb->sk); struct nlattr *tca[TCA_MAX + 1]; struct Qdisc *q = NULL; struct tcf_block *block; - struct tcf_chain *chain; struct tcmsg *tcm = nlmsg_data(cb->nlh); long index_start; long index; @@ -2006,19 +2600,24 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) index_start = cb->args[0]; index = 0; - list_for_each_entry(chain, &block->chain_list, list) { + for (chain = __tcf_get_next_chain(block, NULL); + chain; + chain_prev = chain, + chain = __tcf_get_next_chain(block, chain), + tcf_chain_put(chain_prev)) { if (tca[TCA_CHAIN] && nla_get_u32(tca[TCA_CHAIN]) != chain->index) continue; if (!tcf_chain_dump(chain, q, parent, skb, cb, index_start, &index)) { + tcf_chain_put(chain); err = -EMSGSIZE; break; } } if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) - tcf_block_refcnt_put(block); + tcf_block_refcnt_put(block, true); cb->args[0] = index; out: @@ -2028,8 +2627,10 @@ out: return skb->len; } -static int tc_chain_fill_node(struct tcf_chain *chain, struct net *net, - struct sk_buff *skb, struct tcf_block *block, +static int tc_chain_fill_node(const struct tcf_proto_ops *tmplt_ops, + void *tmplt_priv, u32 chain_index, + struct net *net, struct sk_buff *skb, + struct tcf_block *block, u32 portid, u32 seq, u16 flags, int event) { unsigned char *b = skb_tail_pointer(skb); @@ -2038,8 +2639,8 @@ static int tc_chain_fill_node(struct tcf_chain *chain, struct net *net, struct tcmsg *tcm; void *priv; - ops = chain->tmplt_ops; - priv = chain->tmplt_priv; + ops = tmplt_ops; + priv = tmplt_priv; nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); if (!nlh) @@ -2057,7 +2658,7 @@ static int tc_chain_fill_node(struct tcf_chain *chain, struct net *net, tcm->tcm_block_index = block->index; } - if (nla_put_u32(skb, TCA_CHAIN, chain->index)) + if (nla_put_u32(skb, TCA_CHAIN, chain_index)) goto nla_put_failure; if (ops) { @@ -2088,7 +2689,8 @@ static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb, if (!skb) return -ENOBUFS; - if (tc_chain_fill_node(chain, net, skb, block, portid, + if (tc_chain_fill_node(chain->tmplt_ops, chain->tmplt_priv, + chain->index, net, skb, block, portid, seq, flags, event) <= 0) { kfree_skb(skb); return -EINVAL; @@ -2100,6 +2702,31 @@ static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb, return rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO); } +static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops, + void *tmplt_priv, u32 chain_index, + struct tcf_block *block, struct sk_buff *oskb, + u32 seq, u16 flags, bool unicast) +{ + u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; + struct net *net = block->net; + struct sk_buff *skb; + + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb) + return -ENOBUFS; + + if (tc_chain_fill_node(tmplt_ops, tmplt_priv, chain_index, net, skb, + block, portid, seq, flags, RTM_DELCHAIN) <= 0) { + kfree_skb(skb); + return -EINVAL; + } + + if (unicast) + return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT); + + return rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO); +} + static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net, struct nlattr **tca, struct netlink_ext_ack *extack) @@ -2111,7 +2738,7 @@ static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net, if (!tca[TCA_KIND]) return 0; - ops = tcf_proto_lookup_ops(nla_data(tca[TCA_KIND]), extack); + ops = tcf_proto_lookup_ops(nla_data(tca[TCA_KIND]), true, extack); if (IS_ERR(ops)) return PTR_ERR(ops); if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump) { @@ -2129,16 +2756,15 @@ static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net, return 0; } -static void tc_chain_tmplt_del(struct tcf_chain *chain) +static void tc_chain_tmplt_del(const struct tcf_proto_ops *tmplt_ops, + void *tmplt_priv) { - const struct tcf_proto_ops *ops = chain->tmplt_ops; - /* If template ops are set, no work to do for us. */ - if (!ops) + if (!tmplt_ops) return; - ops->tmplt_destroy(chain->tmplt_priv); - module_put(ops->owner); + tmplt_ops->tmplt_destroy(tmplt_priv); + module_put(tmplt_ops->owner); } /* Add/delete/get a chain */ @@ -2181,6 +2807,8 @@ replay: err = -EINVAL; goto errout_block; } + + mutex_lock(&block->lock); chain = tcf_chain_lookup(block, chain_index); if (n->nlmsg_type == RTM_NEWCHAIN) { if (chain) { @@ -2192,54 +2820,61 @@ replay: } else { NL_SET_ERR_MSG(extack, "Filter chain already exists"); err = -EEXIST; - goto errout_block; + goto errout_block_locked; } } else { if (!(n->nlmsg_flags & NLM_F_CREATE)) { NL_SET_ERR_MSG(extack, "Need both RTM_NEWCHAIN and NLM_F_CREATE to create a new chain"); err = -ENOENT; - goto errout_block; + goto errout_block_locked; } chain = tcf_chain_create(block, chain_index); if (!chain) { NL_SET_ERR_MSG(extack, "Failed to create filter chain"); err = -ENOMEM; - goto errout_block; + goto errout_block_locked; } } } else { if (!chain || tcf_chain_held_by_acts_only(chain)) { NL_SET_ERR_MSG(extack, "Cannot find specified filter chain"); err = -EINVAL; - goto errout_block; + goto errout_block_locked; } tcf_chain_hold(chain); } + if (n->nlmsg_type == RTM_NEWCHAIN) { + /* Modifying chain requires holding parent block lock. In case + * the chain was successfully added, take a reference to the + * chain. This ensures that an empty chain does not disappear at + * the end of this function. + */ + tcf_chain_hold(chain); + chain->explicitly_created = true; + } + mutex_unlock(&block->lock); + switch (n->nlmsg_type) { case RTM_NEWCHAIN: err = tc_chain_tmplt_add(chain, net, tca, extack); - if (err) + if (err) { + tcf_chain_put_explicitly_created(chain); goto errout; - /* In case the chain was successfully added, take a reference - * to the chain. This ensures that an empty chain - * does not disappear at the end of this function. - */ - tcf_chain_hold(chain); - chain->explicitly_created = true; + } + tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL, RTM_NEWCHAIN, false); break; case RTM_DELCHAIN: tfilter_notify_chain(net, skb, block, q, parent, n, - chain, RTM_DELTFILTER); + chain, RTM_DELTFILTER, true); /* Flush the chain first as the user requested chain removal. */ - tcf_chain_flush(chain); + tcf_chain_flush(chain, true); /* In case the chain was successfully deleted, put a reference * to the chain previously taken during addition. */ tcf_chain_put_explicitly_created(chain); - chain->explicitly_created = false; break; case RTM_GETCHAIN: err = tc_chain_notify(chain, skb, n->nlmsg_seq, @@ -2256,21 +2891,25 @@ replay: errout: tcf_chain_put(chain); errout_block: - tcf_block_release(q, block); + tcf_block_release(q, block, true); if (err == -EAGAIN) /* Replay the request. */ goto replay; return err; + +errout_block_locked: + mutex_unlock(&block->lock); + goto errout_block; } /* called with RTNL */ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb) { + struct tcf_chain *chain, *chain_prev; struct net *net = sock_net(skb->sk); struct nlattr *tca[TCA_MAX + 1]; struct Qdisc *q = NULL; struct tcf_block *block; - struct tcf_chain *chain; struct tcmsg *tcm = nlmsg_data(cb->nlh); long index_start; long index; @@ -2334,7 +2973,11 @@ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb) index_start = cb->args[0]; index = 0; - list_for_each_entry(chain, &block->chain_list, list) { + for (chain = __tcf_get_next_chain(block, NULL); + chain; + chain_prev = chain, + chain = __tcf_get_next_chain(block, chain), + tcf_chain_put(chain_prev)) { if ((tca[TCA_CHAIN] && nla_get_u32(tca[TCA_CHAIN]) != chain->index)) continue; @@ -2342,19 +2985,20 @@ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb) index++; continue; } - if (tcf_chain_held_by_acts_only(chain)) - continue; - err = tc_chain_fill_node(chain, net, skb, block, + err = tc_chain_fill_node(chain->tmplt_ops, chain->tmplt_priv, + chain->index, net, skb, block, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWCHAIN); - if (err <= 0) + if (err <= 0) { + tcf_chain_put(chain); break; + } index++; } if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) - tcf_block_refcnt_put(block); + tcf_block_refcnt_put(block, true); cb->args[0] = index; out: @@ -2376,7 +3020,7 @@ EXPORT_SYMBOL(tcf_exts_destroy); int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { #ifdef CONFIG_NET_CLS_ACT { @@ -2386,7 +3030,8 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, if (exts->police && tb[exts->police]) { act = tcf_action_init_1(net, tp, tb[exts->police], rate_tlv, "police", ovr, - TCA_ACT_BIND, true, extack); + TCA_ACT_BIND, rtnl_held, + extack); if (IS_ERR(act)) return PTR_ERR(act); @@ -2398,8 +3043,8 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, err = tcf_action_init(net, tp, tb[exts->action], rate_tlv, NULL, ovr, TCA_ACT_BIND, - exts->actions, &attr_size, true, - extack); + exts->actions, &attr_size, + rtnl_held, extack); if (err < 0) return err; exts->nr_actions = err; @@ -2515,6 +3160,114 @@ int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, } EXPORT_SYMBOL(tc_setup_cb_call); +int tc_setup_flow_action(struct flow_action *flow_action, + const struct tcf_exts *exts) +{ + const struct tc_action *act; + int i, j, k; + + if (!exts) + return 0; + + j = 0; + tcf_exts_for_each_action(i, act, exts) { + struct flow_action_entry *entry; + + entry = &flow_action->entries[j]; + if (is_tcf_gact_ok(act)) { + entry->id = FLOW_ACTION_ACCEPT; + } else if (is_tcf_gact_shot(act)) { + entry->id = FLOW_ACTION_DROP; + } else if (is_tcf_gact_trap(act)) { + entry->id = FLOW_ACTION_TRAP; + } else if (is_tcf_gact_goto_chain(act)) { + entry->id = FLOW_ACTION_GOTO; + entry->chain_index = tcf_gact_goto_chain_index(act); + } else if (is_tcf_mirred_egress_redirect(act)) { + entry->id = FLOW_ACTION_REDIRECT; + entry->dev = tcf_mirred_dev(act); + } else if (is_tcf_mirred_egress_mirror(act)) { + entry->id = FLOW_ACTION_MIRRED; + entry->dev = tcf_mirred_dev(act); + } else if (is_tcf_vlan(act)) { + switch (tcf_vlan_action(act)) { + case TCA_VLAN_ACT_PUSH: + entry->id = FLOW_ACTION_VLAN_PUSH; + entry->vlan.vid = tcf_vlan_push_vid(act); + entry->vlan.proto = tcf_vlan_push_proto(act); + entry->vlan.prio = tcf_vlan_push_prio(act); + break; + case TCA_VLAN_ACT_POP: + entry->id = FLOW_ACTION_VLAN_POP; + break; + case TCA_VLAN_ACT_MODIFY: + entry->id = FLOW_ACTION_VLAN_MANGLE; + entry->vlan.vid = tcf_vlan_push_vid(act); + entry->vlan.proto = tcf_vlan_push_proto(act); + entry->vlan.prio = tcf_vlan_push_prio(act); + break; + default: + goto err_out; + } + } else if (is_tcf_tunnel_set(act)) { + entry->id = FLOW_ACTION_TUNNEL_ENCAP; + entry->tunnel = tcf_tunnel_info(act); + } else if (is_tcf_tunnel_release(act)) { + entry->id = FLOW_ACTION_TUNNEL_DECAP; + entry->tunnel = tcf_tunnel_info(act); + } else if (is_tcf_pedit(act)) { + for (k = 0; k < tcf_pedit_nkeys(act); k++) { + switch (tcf_pedit_cmd(act, k)) { + case TCA_PEDIT_KEY_EX_CMD_SET: + entry->id = FLOW_ACTION_MANGLE; + break; + case TCA_PEDIT_KEY_EX_CMD_ADD: + entry->id = FLOW_ACTION_ADD; + break; + default: + goto err_out; + } + entry->mangle.htype = tcf_pedit_htype(act, k); + entry->mangle.mask = tcf_pedit_mask(act, k); + entry->mangle.val = tcf_pedit_val(act, k); + entry->mangle.offset = tcf_pedit_offset(act, k); + entry = &flow_action->entries[++j]; + } + } else if (is_tcf_csum(act)) { + entry->id = FLOW_ACTION_CSUM; + entry->csum_flags = tcf_csum_update_flags(act); + } else if (is_tcf_skbedit_mark(act)) { + entry->id = FLOW_ACTION_MARK; + entry->mark = tcf_skbedit_mark(act); + } else { + goto err_out; + } + + if (!is_tcf_pedit(act)) + j++; + } + return 0; +err_out: + return -EOPNOTSUPP; +} +EXPORT_SYMBOL(tc_setup_flow_action); + +unsigned int tcf_exts_num_actions(struct tcf_exts *exts) +{ + unsigned int num_acts = 0; + struct tc_action *act; + int i; + + tcf_exts_for_each_action(i, act, exts) { + if (is_tcf_pedit(act)) + num_acts += tcf_pedit_nkeys(act); + else + num_acts++; + } + return num_acts; +} +EXPORT_SYMBOL(tcf_exts_num_actions); + static __net_init int tcf_net_init(struct net *net) { struct tcf_net *tn = net_generic(net, tcf_net_id); @@ -2555,10 +3308,12 @@ static int __init tc_filter_init(void) if (err) goto err_rhash_setup_block_ht; - rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL, 0); - rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL, 0); + rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL, + RTNL_FLAG_DOIT_UNLOCKED); + rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL, + RTNL_FLAG_DOIT_UNLOCKED); rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_get_tfilter, - tc_dump_tfilter, 0); + tc_dump_tfilter, RTNL_FLAG_DOIT_UNLOCKED); rtnl_register(PF_UNSPEC, RTM_NEWCHAIN, tc_ctl_chain, NULL, 0); rtnl_register(PF_UNSPEC, RTM_DELCHAIN, tc_ctl_chain, NULL, 0); rtnl_register(PF_UNSPEC, RTM_GETCHAIN, tc_ctl_chain, diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index 4a57fec6f306..2383f449d2bc 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -107,7 +107,8 @@ static void basic_delete_filter_work(struct work_struct *work) rtnl_unlock(); } -static void basic_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) +static void basic_destroy(struct tcf_proto *tp, bool rtnl_held, + struct netlink_ext_ack *extack) { struct basic_head *head = rtnl_dereference(tp->root); struct basic_filter *f, *n; @@ -126,7 +127,7 @@ static void basic_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) } static int basic_delete(struct tcf_proto *tp, void *arg, bool *last, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { struct basic_head *head = rtnl_dereference(tp->root); struct basic_filter *f = arg; @@ -153,7 +154,7 @@ static int basic_set_parms(struct net *net, struct tcf_proto *tp, { int err; - err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, extack); + err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, true, extack); if (err < 0) return err; @@ -173,7 +174,7 @@ static int basic_set_parms(struct net *net, struct tcf_proto *tp, static int basic_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, void **arg, bool ovr, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { int err; struct basic_head *head = rtnl_dereference(tp->root); @@ -247,7 +248,8 @@ errout: return err; } -static void basic_walk(struct tcf_proto *tp, struct tcf_walker *arg) +static void basic_walk(struct tcf_proto *tp, struct tcf_walker *arg, + bool rtnl_held) { struct basic_head *head = rtnl_dereference(tp->root); struct basic_filter *f; @@ -274,7 +276,7 @@ static void basic_bind_class(void *fh, u32 classid, unsigned long cl) } static int basic_dump(struct net *net, struct tcf_proto *tp, void *fh, - struct sk_buff *skb, struct tcmsg *t) + struct sk_buff *skb, struct tcmsg *t, bool rtnl_held) { struct tc_basic_pcnt gpf = {}; struct basic_filter *f = fh; diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index a95cb240a606..062350c6621c 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -298,7 +298,7 @@ static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog, } static int cls_bpf_delete(struct tcf_proto *tp, void *arg, bool *last, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { struct cls_bpf_head *head = rtnl_dereference(tp->root); @@ -307,7 +307,7 @@ static int cls_bpf_delete(struct tcf_proto *tp, void *arg, bool *last, return 0; } -static void cls_bpf_destroy(struct tcf_proto *tp, +static void cls_bpf_destroy(struct tcf_proto *tp, bool rtnl_held, struct netlink_ext_ack *extack) { struct cls_bpf_head *head = rtnl_dereference(tp->root); @@ -417,7 +417,8 @@ static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp, if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf)) return -EINVAL; - ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, ovr, extack); + ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, ovr, true, + extack); if (ret < 0) return ret; @@ -455,7 +456,8 @@ static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp, static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - void **arg, bool ovr, struct netlink_ext_ack *extack) + void **arg, bool ovr, bool rtnl_held, + struct netlink_ext_ack *extack) { struct cls_bpf_head *head = rtnl_dereference(tp->root); struct cls_bpf_prog *oldprog = *arg; @@ -575,7 +577,7 @@ static int cls_bpf_dump_ebpf_info(const struct cls_bpf_prog *prog, } static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, void *fh, - struct sk_buff *skb, struct tcmsg *tm) + struct sk_buff *skb, struct tcmsg *tm, bool rtnl_held) { struct cls_bpf_prog *prog = fh; struct nlattr *nest; @@ -635,7 +637,8 @@ static void cls_bpf_bind_class(void *fh, u32 classid, unsigned long cl) prog->res.class = cl; } -static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg) +static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg, + bool rtnl_held) { struct cls_bpf_head *head = rtnl_dereference(tp->root); struct cls_bpf_prog *prog; diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index 3bc01bdde165..02b05066b635 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -78,7 +78,7 @@ static void cls_cgroup_destroy_work(struct work_struct *work) static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - void **arg, bool ovr, + void **arg, bool ovr, bool rtnl_held, struct netlink_ext_ack *extack) { struct nlattr *tb[TCA_CGROUP_MAX + 1]; @@ -110,7 +110,7 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, goto errout; err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &new->exts, ovr, - extack); + true, extack); if (err < 0) goto errout; @@ -130,7 +130,7 @@ errout: return err; } -static void cls_cgroup_destroy(struct tcf_proto *tp, +static void cls_cgroup_destroy(struct tcf_proto *tp, bool rtnl_held, struct netlink_ext_ack *extack) { struct cls_cgroup_head *head = rtnl_dereference(tp->root); @@ -145,18 +145,21 @@ static void cls_cgroup_destroy(struct tcf_proto *tp, } static int cls_cgroup_delete(struct tcf_proto *tp, void *arg, bool *last, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { return -EOPNOTSUPP; } -static void cls_cgroup_walk(struct tcf_proto *tp, struct tcf_walker *arg) +static void cls_cgroup_walk(struct tcf_proto *tp, struct tcf_walker *arg, + bool rtnl_held) { struct cls_cgroup_head *head = rtnl_dereference(tp->root); if (arg->count < arg->skip) goto skip; + if (!head) + return; if (arg->fn(tp, head, arg) < 0) { arg->stop = 1; return; @@ -166,7 +169,7 @@ skip: } static int cls_cgroup_dump(struct net *net, struct tcf_proto *tp, void *fh, - struct sk_buff *skb, struct tcmsg *t) + struct sk_buff *skb, struct tcmsg *t, bool rtnl_held) { struct cls_cgroup_head *head = rtnl_dereference(tp->root); struct nlattr *nest; diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 2bb043cd436b..204e2edae8d5 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -391,7 +391,8 @@ static void flow_destroy_filter_work(struct work_struct *work) static int flow_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - void **arg, bool ovr, struct netlink_ext_ack *extack) + void **arg, bool ovr, bool rtnl_held, + struct netlink_ext_ack *extack) { struct flow_head *head = rtnl_dereference(tp->root); struct flow_filter *fold, *fnew; @@ -445,7 +446,7 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, goto err2; err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &fnew->exts, ovr, - extack); + true, extack); if (err < 0) goto err2; @@ -566,7 +567,7 @@ err1: } static int flow_delete(struct tcf_proto *tp, void *arg, bool *last, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { struct flow_head *head = rtnl_dereference(tp->root); struct flow_filter *f = arg; @@ -590,7 +591,8 @@ static int flow_init(struct tcf_proto *tp) return 0; } -static void flow_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) +static void flow_destroy(struct tcf_proto *tp, bool rtnl_held, + struct netlink_ext_ack *extack) { struct flow_head *head = rtnl_dereference(tp->root); struct flow_filter *f, *next; @@ -617,7 +619,7 @@ static void *flow_get(struct tcf_proto *tp, u32 handle) } static int flow_dump(struct net *net, struct tcf_proto *tp, void *fh, - struct sk_buff *skb, struct tcmsg *t) + struct sk_buff *skb, struct tcmsg *t, bool rtnl_held) { struct flow_filter *f = fh; struct nlattr *nest; @@ -677,7 +679,8 @@ nla_put_failure: return -1; } -static void flow_walk(struct tcf_proto *tp, struct tcf_walker *arg) +static void flow_walk(struct tcf_proto *tp, struct tcf_walker *arg, + bool rtnl_held) { struct flow_head *head = rtnl_dereference(tp->root); struct flow_filter *f; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index f6aa57fbbbaf..640f83e7f93f 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -381,16 +381,31 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, bool skip_sw = tc_skip_sw(f->flags); int err; + cls_flower.rule = flow_rule_alloc(tcf_exts_num_actions(&f->exts)); + if (!cls_flower.rule) + return -ENOMEM; + tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack); cls_flower.command = TC_CLSFLOWER_REPLACE; cls_flower.cookie = (unsigned long) f; - cls_flower.dissector = &f->mask->dissector; - cls_flower.mask = &f->mask->key; - cls_flower.key = &f->mkey; - cls_flower.exts = &f->exts; + cls_flower.rule->match.dissector = &f->mask->dissector; + cls_flower.rule->match.mask = &f->mask->key; + cls_flower.rule->match.key = &f->mkey; cls_flower.classid = f->res.classid; + err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts); + if (err) { + kfree(cls_flower.rule); + if (skip_sw) { + NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action"); + return err; + } + return 0; + } + err = tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, skip_sw); + kfree(cls_flower.rule); + if (err < 0) { fl_hw_destroy_filter(tp, f, NULL); return err; @@ -413,10 +428,13 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f) tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, NULL); cls_flower.command = TC_CLSFLOWER_STATS; cls_flower.cookie = (unsigned long) f; - cls_flower.exts = &f->exts; cls_flower.classid = f->res.classid; tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false); + + tcf_exts_stats_update(&f->exts, cls_flower.stats.bytes, + cls_flower.stats.pkts, + cls_flower.stats.lastused); } static bool __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f, @@ -451,7 +469,8 @@ static void fl_destroy_sleepable(struct work_struct *work) module_put(THIS_MODULE); } -static void fl_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) +static void fl_destroy(struct tcf_proto *tp, bool rtnl_held, + struct netlink_ext_ack *extack) { struct cls_fl_head *head = rtnl_dereference(tp->root); struct fl_flow_mask *mask, *next_mask; @@ -1258,7 +1277,8 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp, { int err; - err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, extack); + err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, true, + extack); if (err < 0) return err; @@ -1285,7 +1305,8 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp, static int fl_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - void **arg, bool ovr, struct netlink_ext_ack *extack) + void **arg, bool ovr, bool rtnl_held, + struct netlink_ext_ack *extack) { struct cls_fl_head *head = rtnl_dereference(tp->root); struct cls_fl_filter *fold = *arg; @@ -1371,7 +1392,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, if (!tc_skip_hw(fnew->flags)) { err = fl_hw_replace_filter(tp, fnew, extack); if (err) - goto errout_mask; + goto errout_mask_ht; } if (!tc_in_hw(fnew->flags)) @@ -1401,6 +1422,10 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, kfree(mask); return 0; +errout_mask_ht: + rhashtable_remove_fast(&fnew->mask->ht, &fnew->ht_node, + fnew->mask->filter_ht_params); + errout_mask: fl_mask_put(head, fnew->mask, false); @@ -1418,7 +1443,7 @@ errout_mask_alloc: } static int fl_delete(struct tcf_proto *tp, void *arg, bool *last, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { struct cls_fl_head *head = rtnl_dereference(tp->root); struct cls_fl_filter *f = arg; @@ -1430,7 +1455,8 @@ static int fl_delete(struct tcf_proto *tp, void *arg, bool *last, return 0; } -static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg) +static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg, + bool rtnl_held) { struct cls_fl_head *head = rtnl_dereference(tp->root); struct cls_fl_filter *f; @@ -1463,18 +1489,36 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb, if (tc_skip_hw(f->flags)) continue; + cls_flower.rule = + flow_rule_alloc(tcf_exts_num_actions(&f->exts)); + if (!cls_flower.rule) + return -ENOMEM; + tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack); cls_flower.command = add ? TC_CLSFLOWER_REPLACE : TC_CLSFLOWER_DESTROY; cls_flower.cookie = (unsigned long)f; - cls_flower.dissector = &mask->dissector; - cls_flower.mask = &mask->key; - cls_flower.key = &f->mkey; - cls_flower.exts = &f->exts; + cls_flower.rule->match.dissector = &mask->dissector; + cls_flower.rule->match.mask = &mask->key; + cls_flower.rule->match.key = &f->mkey; + + err = tc_setup_flow_action(&cls_flower.rule->action, + &f->exts); + if (err) { + kfree(cls_flower.rule); + if (tc_skip_sw(f->flags)) { + NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action"); + return err; + } + continue; + } + cls_flower.classid = f->res.classid; err = cb(TC_SETUP_CLSFLOWER, &cls_flower, cb_priv); + kfree(cls_flower.rule); + if (err) { if (add && tc_skip_sw(f->flags)) return err; @@ -1489,25 +1533,30 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb, return 0; } -static void fl_hw_create_tmplt(struct tcf_chain *chain, - struct fl_flow_tmplt *tmplt) +static int fl_hw_create_tmplt(struct tcf_chain *chain, + struct fl_flow_tmplt *tmplt) { struct tc_cls_flower_offload cls_flower = {}; struct tcf_block *block = chain->block; - struct tcf_exts dummy_exts = { 0, }; + + cls_flower.rule = flow_rule_alloc(0); + if (!cls_flower.rule) + return -ENOMEM; cls_flower.common.chain_index = chain->index; cls_flower.command = TC_CLSFLOWER_TMPLT_CREATE; cls_flower.cookie = (unsigned long) tmplt; - cls_flower.dissector = &tmplt->dissector; - cls_flower.mask = &tmplt->mask; - cls_flower.key = &tmplt->dummy_key; - cls_flower.exts = &dummy_exts; + cls_flower.rule->match.dissector = &tmplt->dissector; + cls_flower.rule->match.mask = &tmplt->mask; + cls_flower.rule->match.key = &tmplt->dummy_key; /* We don't care if driver (any of them) fails to handle this * call. It serves just as a hint for it. */ tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false); + kfree(cls_flower.rule); + + return 0; } static void fl_hw_destroy_tmplt(struct tcf_chain *chain, @@ -1551,12 +1600,14 @@ static void *fl_tmplt_create(struct net *net, struct tcf_chain *chain, err = fl_set_key(net, tb, &tmplt->dummy_key, &tmplt->mask, extack); if (err) goto errout_tmplt; - kfree(tb); fl_init_dissector(&tmplt->dissector, &tmplt->mask); - fl_hw_create_tmplt(chain, tmplt); + err = fl_hw_create_tmplt(chain, tmplt); + if (err) + goto errout_tmplt; + kfree(tb); return tmplt; errout_tmplt: @@ -2004,7 +2055,7 @@ nla_put_failure: } static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh, - struct sk_buff *skb, struct tcmsg *t) + struct sk_buff *skb, struct tcmsg *t, bool rtnl_held) { struct cls_fl_filter *f = fh; struct nlattr *nest; diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 29eeeaf3ea44..4e34966f2ae2 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -139,7 +139,8 @@ static void fw_delete_filter_work(struct work_struct *work) rtnl_unlock(); } -static void fw_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) +static void fw_destroy(struct tcf_proto *tp, bool rtnl_held, + struct netlink_ext_ack *extack) { struct fw_head *head = rtnl_dereference(tp->root); struct fw_filter *f; @@ -163,7 +164,7 @@ static void fw_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) } static int fw_delete(struct tcf_proto *tp, void *arg, bool *last, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { struct fw_head *head = rtnl_dereference(tp->root); struct fw_filter *f = arg; @@ -217,7 +218,7 @@ static int fw_set_parms(struct net *net, struct tcf_proto *tp, int err; err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &f->exts, ovr, - extack); + true, extack); if (err < 0) return err; @@ -250,7 +251,8 @@ static int fw_set_parms(struct net *net, struct tcf_proto *tp, static int fw_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, void **arg, - bool ovr, struct netlink_ext_ack *extack) + bool ovr, bool rtnl_held, + struct netlink_ext_ack *extack) { struct fw_head *head = rtnl_dereference(tp->root); struct fw_filter *f = *arg; @@ -354,15 +356,13 @@ errout: return err; } -static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg) +static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg, + bool rtnl_held) { struct fw_head *head = rtnl_dereference(tp->root); int h; - if (head == NULL) - arg->stop = 1; - - if (arg->stop) + if (head == NULL || arg->stop) return; for (h = 0; h < HTSIZE; h++) { @@ -384,7 +384,7 @@ static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg) } static int fw_dump(struct net *net, struct tcf_proto *tp, void *fh, - struct sk_buff *skb, struct tcmsg *t) + struct sk_buff *skb, struct tcmsg *t, bool rtnl_held) { struct fw_head *head = rtnl_dereference(tp->root); struct fw_filter *f = fh; diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index a1b803fd372e..1f9d481b0fbb 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -109,7 +109,8 @@ static int mall_replace_hw_filter(struct tcf_proto *tp, return 0; } -static void mall_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) +static void mall_destroy(struct tcf_proto *tp, bool rtnl_held, + struct netlink_ext_ack *extack) { struct cls_mall_head *head = rtnl_dereference(tp->root); @@ -145,7 +146,8 @@ static int mall_set_parms(struct net *net, struct tcf_proto *tp, { int err; - err = tcf_exts_validate(net, tp, tb, est, &head->exts, ovr, extack); + err = tcf_exts_validate(net, tp, tb, est, &head->exts, ovr, true, + extack); if (err < 0) return err; @@ -159,7 +161,8 @@ static int mall_set_parms(struct net *net, struct tcf_proto *tp, static int mall_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - void **arg, bool ovr, struct netlink_ext_ack *extack) + void **arg, bool ovr, bool rtnl_held, + struct netlink_ext_ack *extack) { struct cls_mall_head *head = rtnl_dereference(tp->root); struct nlattr *tb[TCA_MATCHALL_MAX + 1]; @@ -232,17 +235,21 @@ err_exts_init: } static int mall_delete(struct tcf_proto *tp, void *arg, bool *last, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { return -EOPNOTSUPP; } -static void mall_walk(struct tcf_proto *tp, struct tcf_walker *arg) +static void mall_walk(struct tcf_proto *tp, struct tcf_walker *arg, + bool rtnl_held) { struct cls_mall_head *head = rtnl_dereference(tp->root); if (arg->count < arg->skip) goto skip; + + if (!head) + return; if (arg->fn(tp, head, arg) < 0) arg->stop = 1; skip: @@ -279,7 +286,7 @@ static int mall_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb, } static int mall_dump(struct net *net, struct tcf_proto *tp, void *fh, - struct sk_buff *skb, struct tcmsg *t) + struct sk_buff *skb, struct tcmsg *t, bool rtnl_held) { struct tc_matchall_pcnt gpf = {}; struct cls_mall_head *head = fh; diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 0404aa5fa7cb..444d15a75d98 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -276,7 +276,8 @@ static void route4_queue_work(struct route4_filter *f) tcf_queue_work(&f->rwork, route4_delete_filter_work); } -static void route4_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) +static void route4_destroy(struct tcf_proto *tp, bool rtnl_held, + struct netlink_ext_ack *extack) { struct route4_head *head = rtnl_dereference(tp->root); int h1, h2; @@ -312,7 +313,7 @@ static void route4_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) } static int route4_delete(struct tcf_proto *tp, void *arg, bool *last, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { struct route4_head *head = rtnl_dereference(tp->root); struct route4_filter *f = arg; @@ -393,7 +394,7 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp, struct route4_bucket *b; int err; - err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, extack); + err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, true, extack); if (err < 0) return err; @@ -468,7 +469,7 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp, static int route4_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, void **arg, bool ovr, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { struct route4_head *head = rtnl_dereference(tp->root); struct route4_filter __rcu **fp; @@ -560,15 +561,13 @@ errout: return err; } -static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg) +static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg, + bool rtnl_held) { struct route4_head *head = rtnl_dereference(tp->root); unsigned int h, h1; - if (head == NULL) - arg->stop = 1; - - if (arg->stop) + if (head == NULL || arg->stop) return; for (h = 0; h <= 256; h++) { @@ -597,7 +596,7 @@ static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg) } static int route4_dump(struct net *net, struct tcf_proto *tp, void *fh, - struct sk_buff *skb, struct tcmsg *t) + struct sk_buff *skb, struct tcmsg *t, bool rtnl_held) { struct route4_filter *f = fh; struct nlattr *nest; diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index e9ccf7daea7d..4d3836178fa5 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -312,7 +312,8 @@ static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f) __rsvp_delete_filter(f); } -static void rsvp_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) +static void rsvp_destroy(struct tcf_proto *tp, bool rtnl_held, + struct netlink_ext_ack *extack) { struct rsvp_head *data = rtnl_dereference(tp->root); int h1, h2; @@ -341,7 +342,7 @@ static void rsvp_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) } static int rsvp_delete(struct tcf_proto *tp, void *arg, bool *last, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { struct rsvp_head *head = rtnl_dereference(tp->root); struct rsvp_filter *nfp, *f = arg; @@ -477,7 +478,8 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - void **arg, bool ovr, struct netlink_ext_ack *extack) + void **arg, bool ovr, bool rtnl_held, + struct netlink_ext_ack *extack) { struct rsvp_head *data = rtnl_dereference(tp->root); struct rsvp_filter *f, *nfp; @@ -502,7 +504,8 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb, err = tcf_exts_init(&e, TCA_RSVP_ACT, TCA_RSVP_POLICE); if (err < 0) return err; - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr, extack); + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr, true, + extack); if (err < 0) goto errout2; @@ -654,7 +657,8 @@ errout2: return err; } -static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg) +static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg, + bool rtnl_held) { struct rsvp_head *head = rtnl_dereference(tp->root); unsigned int h, h1; @@ -688,7 +692,7 @@ static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg) } static int rsvp_dump(struct net *net, struct tcf_proto *tp, void *fh, - struct sk_buff *skb, struct tcmsg *t) + struct sk_buff *skb, struct tcmsg *t, bool rtnl_held) { struct rsvp_filter *f = fh; struct rsvp_session *s; diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 9ccc93f257db..e1981628047b 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -173,7 +173,7 @@ static void tcindex_destroy_fexts_work(struct work_struct *work) } static int tcindex_delete(struct tcf_proto *tp, void *arg, bool *last, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { struct tcindex_data *p = rtnl_dereference(tp->root); struct tcindex_filter_result *r = arg; @@ -226,7 +226,7 @@ static int tcindex_destroy_element(struct tcf_proto *tp, { bool last; - return tcindex_delete(tp, arg, &last, NULL); + return tcindex_delete(tp, arg, &last, false, NULL); } static void __tcindex_destroy(struct rcu_head *head) @@ -275,7 +275,7 @@ static void tcindex_free_perfect_hash(struct tcindex_data *cp) kfree(cp->perfect); } -static int tcindex_alloc_perfect_hash(struct tcindex_data *cp) +static int tcindex_alloc_perfect_hash(struct net *net, struct tcindex_data *cp) { int i, err = 0; @@ -289,6 +289,9 @@ static int tcindex_alloc_perfect_hash(struct tcindex_data *cp) TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); if (err < 0) goto errout; +#ifdef CONFIG_NET_CLS_ACT + cp->perfect[i].exts.net = net; +#endif } return 0; @@ -305,16 +308,16 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, struct nlattr *est, bool ovr, struct netlink_ext_ack *extack) { struct tcindex_filter_result new_filter_result, *old_r = r; - struct tcindex_filter_result cr; struct tcindex_data *cp = NULL, *oldp; struct tcindex_filter *f = NULL; /* make gcc behave */ + struct tcf_result cr = {}; int err, balloc = 0; struct tcf_exts e; err = tcf_exts_init(&e, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); if (err < 0) return err; - err = tcf_exts_validate(net, tp, tb, est, &e, ovr, extack); + err = tcf_exts_validate(net, tp, tb, est, &e, ovr, true, extack); if (err < 0) goto errout; @@ -337,7 +340,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, if (p->perfect) { int i; - if (tcindex_alloc_perfect_hash(cp) < 0) + if (tcindex_alloc_perfect_hash(net, cp) < 0) goto errout; for (i = 0; i < cp->hash; i++) cp->perfect[i].res = p->perfect[i].res; @@ -348,11 +351,8 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, err = tcindex_filter_result_init(&new_filter_result); if (err < 0) goto errout1; - err = tcindex_filter_result_init(&cr); - if (err < 0) - goto errout1; if (old_r) - cr.res = r->res; + cr = r->res; if (tb[TCA_TCINDEX_HASH]) cp->hash = nla_get_u32(tb[TCA_TCINDEX_HASH]); @@ -406,7 +406,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, err = -ENOMEM; if (!cp->perfect && !cp->h) { if (valid_perfect_hash(cp)) { - if (tcindex_alloc_perfect_hash(cp) < 0) + if (tcindex_alloc_perfect_hash(net, cp) < 0) goto errout_alloc; balloc = 1; } else { @@ -443,8 +443,8 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, } if (tb[TCA_TCINDEX_CLASSID]) { - cr.res.classid = nla_get_u32(tb[TCA_TCINDEX_CLASSID]); - tcf_bind_filter(tp, &cr.res, base); + cr.classid = nla_get_u32(tb[TCA_TCINDEX_CLASSID]); + tcf_bind_filter(tp, &cr, base); } if (old_r && old_r != r) { @@ -456,7 +456,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, } oldp = p; - r->res = cr.res; + r->res = cr; tcf_exts_change(&r->exts, &e); rcu_assign_pointer(tp->root, cp); @@ -475,6 +475,8 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, ; /* nothing */ rcu_assign_pointer(*fp, f); + } else { + tcf_exts_destroy(&new_filter_result.exts); } if (oldp) @@ -487,7 +489,6 @@ errout_alloc: else if (balloc == 2) kfree(cp->h); errout1: - tcf_exts_destroy(&cr.exts); tcf_exts_destroy(&new_filter_result.exts); errout: kfree(cp); @@ -499,7 +500,7 @@ static int tcindex_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, void **arg, bool ovr, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_TCINDEX_MAX + 1]; @@ -522,7 +523,8 @@ tcindex_change(struct net *net, struct sk_buff *in_skb, tca[TCA_RATE], ovr, extack); } -static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker) +static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker, + bool rtnl_held) { struct tcindex_data *p = rtnl_dereference(tp->root); struct tcindex_filter *f, *next; @@ -558,7 +560,7 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker) } } -static void tcindex_destroy(struct tcf_proto *tp, +static void tcindex_destroy(struct tcf_proto *tp, bool rtnl_held, struct netlink_ext_ack *extack) { struct tcindex_data *p = rtnl_dereference(tp->root); @@ -568,14 +570,14 @@ static void tcindex_destroy(struct tcf_proto *tp, walker.count = 0; walker.skip = 0; walker.fn = tcindex_destroy_element; - tcindex_walk(tp, &walker); + tcindex_walk(tp, &walker, true); call_rcu(&p->rcu, __tcindex_destroy); } static int tcindex_dump(struct net *net, struct tcf_proto *tp, void *fh, - struct sk_buff *skb, struct tcmsg *t) + struct sk_buff *skb, struct tcmsg *t, bool rtnl_held) { struct tcindex_data *p = rtnl_dereference(tp->root); struct tcindex_filter_result *r = fh; diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index dcea21004604..27d29c04dcc9 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -629,7 +629,8 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht, return -ENOENT; } -static void u32_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) +static void u32_destroy(struct tcf_proto *tp, bool rtnl_held, + struct netlink_ext_ack *extack) { struct tc_u_common *tp_c = tp->data; struct tc_u_hnode *root_ht = rtnl_dereference(tp->root); @@ -663,7 +664,7 @@ static void u32_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) } static int u32_delete(struct tcf_proto *tp, void *arg, bool *last, - struct netlink_ext_ack *extack) + bool rtnl_held, struct netlink_ext_ack *extack) { struct tc_u_hnode *ht = arg; struct tc_u_common *tp_c = tp->data; @@ -726,7 +727,7 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp, { int err; - err = tcf_exts_validate(net, tp, tb, est, &n->exts, ovr, extack); + err = tcf_exts_validate(net, tp, tb, est, &n->exts, ovr, true, extack); if (err < 0) return err; @@ -858,7 +859,7 @@ static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp, static int u32_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, - struct nlattr **tca, void **arg, bool ovr, + struct nlattr **tca, void **arg, bool ovr, bool rtnl_held, struct netlink_ext_ack *extack) { struct tc_u_common *tp_c = tp->data; @@ -1123,7 +1124,8 @@ erridr: return err; } -static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg) +static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg, + bool rtnl_held) { struct tc_u_common *tp_c = tp->data; struct tc_u_hnode *ht; @@ -1281,7 +1283,7 @@ static void u32_bind_class(void *fh, u32 classid, unsigned long cl) } static int u32_dump(struct net *net, struct tcf_proto *tp, void *fh, - struct sk_buff *skb, struct tcmsg *t) + struct sk_buff *skb, struct tcmsg *t, bool rtnl_held) { struct tc_u_knode *n = fh; struct tc_u_hnode *ht_up, *ht_down; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 03e26e8d0ec9..b8a388e4bcc4 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1201,9 +1201,11 @@ static struct Qdisc *qdisc_create(struct net_device *dev, } else { if (handle == 0) { handle = qdisc_alloc_handle(dev); - err = -ENOMEM; - if (handle == 0) + if (handle == 0) { + NL_SET_ERR_MSG(extack, "Maximum number of qdisc handles was exceeded"); + err = -ENOSPC; goto err_out3; + } } if (!netif_is_multiqueue(dev)) sch->flags |= TCQ_F_ONETXQUEUE; @@ -1909,17 +1911,19 @@ static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid, block = cops->tcf_block(q, cl, NULL); if (!block) return; - list_for_each_entry(chain, &block->chain_list, list) { + for (chain = tcf_get_next_chain(block, NULL); + chain; + chain = tcf_get_next_chain(block, chain)) { struct tcf_proto *tp; - for (tp = rtnl_dereference(chain->filter_chain); - tp; tp = rtnl_dereference(tp->next)) { + for (tp = tcf_get_next_proto(chain, NULL, true); + tp; tp = tcf_get_next_proto(chain, tp, true)) { struct tcf_bind_args arg = {}; arg.w.fn = tcf_node_bind; arg.classid = clid; arg.cl = new_cl; - tp->ops->walk(tp, &arg.w); + tp->ops->walk(tp, &arg.w, true); } } } diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 66ba2ce2320f..38e5add14fab 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -500,7 +500,7 @@ static void dev_watchdog_down(struct net_device *dev) * netif_carrier_on - set carrier * @dev: network device * - * Device has detected that carrier. + * Device has detected acquisition of carrier. */ void netif_carrier_on(struct net_device *dev) { @@ -1366,7 +1366,11 @@ static void mini_qdisc_rcu_func(struct rcu_head *head) void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp, struct tcf_proto *tp_head) { - struct mini_Qdisc *miniq_old = rtnl_dereference(*miniqp->p_miniq); + /* Protected with chain0->filter_chain_lock. + * Can't access chain directly because tp_head can be NULL. + */ + struct mini_Qdisc *miniq_old = + rcu_dereference_protected(*miniqp->p_miniq, 1); struct mini_Qdisc *miniq; if (!tp_head) { diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 201c888604e4..d2c7d0d2abc1 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -101,7 +101,7 @@ static struct sctp_association *sctp_association_init( * socket values. */ asoc->max_retrans = sp->assocparams.sasoc_asocmaxrxt; - asoc->pf_retrans = net->sctp.pf_retrans; + asoc->pf_retrans = sp->pf_retrans; asoc->rto_initial = msecs_to_jiffies(sp->rtoinfo.srto_initial); asoc->rto_max = msecs_to_jiffies(sp->rtoinfo.srto_max); @@ -1651,8 +1651,11 @@ int sctp_assoc_set_id(struct sctp_association *asoc, gfp_t gfp) if (preload) idr_preload(gfp); spin_lock_bh(&sctp_assocs_id_lock); - /* 0 is not a valid assoc_id, must be >= 1 */ - ret = idr_alloc_cyclic(&sctp_assocs_id, asoc, 1, 0, GFP_NOWAIT); + /* 0, 1, 2 are used as SCTP_FUTURE_ASSOC, SCTP_CURRENT_ASSOC and + * SCTP_ALL_ASSOC, so an available id must be > SCTP_ALL_ASSOC. + */ + ret = idr_alloc_cyclic(&sctp_assocs_id, asoc, SCTP_ALL_ASSOC + 1, 0, + GFP_NOWAIT); spin_unlock_bh(&sctp_assocs_id_lock); if (preload) idr_preload_end(); diff --git a/net/sctp/diag.c b/net/sctp/diag.c index 078f01a8d582..435847d98b51 100644 --- a/net/sctp/diag.c +++ b/net/sctp/diag.c @@ -256,6 +256,7 @@ static size_t inet_assoc_attr_size(struct sctp_association *asoc) + nla_total_size(1) /* INET_DIAG_TOS */ + nla_total_size(1) /* INET_DIAG_TCLASS */ + nla_total_size(4) /* INET_DIAG_MARK */ + + nla_total_size(4) /* INET_DIAG_CLASS_ID */ + nla_total_size(addrlen * asoc->peer.transport_count) + nla_total_size(addrlen * addrcnt) + nla_total_size(sizeof(struct inet_diag_meminfo)) diff --git a/net/sctp/offload.c b/net/sctp/offload.c index 123e9f2dc226..edfcf16e704c 100644 --- a/net/sctp/offload.c +++ b/net/sctp/offload.c @@ -36,6 +36,7 @@ static __le32 sctp_gso_make_checksum(struct sk_buff *skb) { skb->ip_summed = CHECKSUM_NONE; skb->csum_not_inet = 0; + gso_reset_checksum(skb, ~0); return sctp_compute_cksum(skb, skb_transport_offset(skb)); } diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index c37e1c2dec9d..fd33281999b5 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -212,7 +212,7 @@ void sctp_outq_init(struct sctp_association *asoc, struct sctp_outq *q) INIT_LIST_HEAD(&q->retransmit); INIT_LIST_HEAD(&q->sacked); INIT_LIST_HEAD(&q->abandoned); - sctp_sched_set_sched(asoc, SCTP_SS_DEFAULT); + sctp_sched_set_sched(asoc, sctp_sk(asoc->base.sk)->default_ss); } /* Free the outqueue structure and any related pending chunks. diff --git a/net/sctp/socket.c b/net/sctp/socket.c index f93c3cf9e567..a78e55a1bb9c 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -248,7 +248,7 @@ struct sctp_association *sctp_id2assoc(struct sock *sk, sctp_assoc_t id) } /* Otherwise this is a UDP-style socket. */ - if (!id || (id == (sctp_assoc_t)-1)) + if (id <= SCTP_ALL_ASSOC) return NULL; spin_lock_bh(&sctp_assocs_id_lock); @@ -2027,7 +2027,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) struct sctp_endpoint *ep = sctp_sk(sk)->ep; struct sctp_transport *transport = NULL; struct sctp_sndrcvinfo _sinfo, *sinfo; - struct sctp_association *asoc; + struct sctp_association *asoc, *tmp; struct sctp_cmsgs cmsgs; union sctp_addr *daddr; bool new = false; @@ -2053,7 +2053,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) /* SCTP_SENDALL process */ if ((sflags & SCTP_SENDALL) && sctp_style(sk, UDP)) { - list_for_each_entry(asoc, &ep->asocs, asocs) { + list_for_each_entry_safe(asoc, tmp, &ep->asocs, asocs) { err = sctp_sendmsg_check_sflags(asoc, sflags, msg, msg_len); if (err == 0) @@ -2750,12 +2750,13 @@ static int sctp_setsockopt_peer_addr_params(struct sock *sk, return -EINVAL; } - /* Get association, if assoc_id != 0 and the socket is a one - * to many style socket, and an association was not found, then - * the id was invalid. + /* Get association, if assoc_id != SCTP_FUTURE_ASSOC and the + * socket is a one to many style socket, and an association + * was not found, then the id was invalid. */ asoc = sctp_id2assoc(sk, params.spp_assoc_id); - if (!asoc && params.spp_assoc_id && sctp_style(sk, UDP)) + if (!asoc && params.spp_assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) return -EINVAL; /* Heartbeat demand can only be sent on a transport or @@ -2797,6 +2798,43 @@ static inline __u32 sctp_spp_sackdelay_disable(__u32 param_flags) return (param_flags & ~SPP_SACKDELAY) | SPP_SACKDELAY_DISABLE; } +static void sctp_apply_asoc_delayed_ack(struct sctp_sack_info *params, + struct sctp_association *asoc) +{ + struct sctp_transport *trans; + + if (params->sack_delay) { + asoc->sackdelay = msecs_to_jiffies(params->sack_delay); + asoc->param_flags = + sctp_spp_sackdelay_enable(asoc->param_flags); + } + if (params->sack_freq == 1) { + asoc->param_flags = + sctp_spp_sackdelay_disable(asoc->param_flags); + } else if (params->sack_freq > 1) { + asoc->sackfreq = params->sack_freq; + asoc->param_flags = + sctp_spp_sackdelay_enable(asoc->param_flags); + } + + list_for_each_entry(trans, &asoc->peer.transport_addr_list, + transports) { + if (params->sack_delay) { + trans->sackdelay = msecs_to_jiffies(params->sack_delay); + trans->param_flags = + sctp_spp_sackdelay_enable(trans->param_flags); + } + if (params->sack_freq == 1) { + trans->param_flags = + sctp_spp_sackdelay_disable(trans->param_flags); + } else if (params->sack_freq > 1) { + trans->sackfreq = params->sack_freq; + trans->param_flags = + sctp_spp_sackdelay_enable(trans->param_flags); + } + } +} + /* * 7.1.23. Get or set delayed ack timer (SCTP_DELAYED_SACK) * @@ -2836,10 +2874,9 @@ static inline __u32 sctp_spp_sackdelay_disable(__u32 param_flags) static int sctp_setsockopt_delayed_ack(struct sock *sk, char __user *optval, unsigned int optlen) { - struct sctp_sack_info params; - struct sctp_transport *trans = NULL; - struct sctp_association *asoc = NULL; - struct sctp_sock *sp = sctp_sk(sk); + struct sctp_sock *sp = sctp_sk(sk); + struct sctp_association *asoc; + struct sctp_sack_info params; if (optlen == sizeof(struct sctp_sack_info)) { if (copy_from_user(¶ms, optval, optlen)) @@ -2867,67 +2904,42 @@ static int sctp_setsockopt_delayed_ack(struct sock *sk, if (params.sack_delay > 500) return -EINVAL; - /* Get association, if sack_assoc_id != 0 and the socket is a one - * to many style socket, and an association was not found, then - * the id was invalid. + /* Get association, if sack_assoc_id != SCTP_FUTURE_ASSOC and the + * socket is a one to many style socket, and an association + * was not found, then the id was invalid. */ asoc = sctp_id2assoc(sk, params.sack_assoc_id); - if (!asoc && params.sack_assoc_id && sctp_style(sk, UDP)) + if (!asoc && params.sack_assoc_id > SCTP_ALL_ASSOC && + sctp_style(sk, UDP)) return -EINVAL; - if (params.sack_delay) { - if (asoc) { - asoc->sackdelay = - msecs_to_jiffies(params.sack_delay); - asoc->param_flags = - sctp_spp_sackdelay_enable(asoc->param_flags); - } else { + if (asoc) { + sctp_apply_asoc_delayed_ack(¶ms, asoc); + + return 0; + } + + if (params.sack_assoc_id == SCTP_FUTURE_ASSOC || + params.sack_assoc_id == SCTP_ALL_ASSOC) { + if (params.sack_delay) { sp->sackdelay = params.sack_delay; sp->param_flags = sctp_spp_sackdelay_enable(sp->param_flags); } - } - - if (params.sack_freq == 1) { - if (asoc) { - asoc->param_flags = - sctp_spp_sackdelay_disable(asoc->param_flags); - } else { + if (params.sack_freq == 1) { sp->param_flags = sctp_spp_sackdelay_disable(sp->param_flags); - } - } else if (params.sack_freq > 1) { - if (asoc) { - asoc->sackfreq = params.sack_freq; - asoc->param_flags = - sctp_spp_sackdelay_enable(asoc->param_flags); - } else { + } else if (params.sack_freq > 1) { sp->sackfreq = params.sack_freq; sp->param_flags = sctp_spp_sackdelay_enable(sp->param_flags); } } - /* If change is for association, also apply to each transport. */ - if (asoc) { - list_for_each_entry(trans, &asoc->peer.transport_addr_list, - transports) { - if (params.sack_delay) { - trans->sackdelay = - msecs_to_jiffies(params.sack_delay); - trans->param_flags = - sctp_spp_sackdelay_enable(trans->param_flags); - } - if (params.sack_freq == 1) { - trans->param_flags = - sctp_spp_sackdelay_disable(trans->param_flags); - } else if (params.sack_freq > 1) { - trans->sackfreq = params.sack_freq; - trans->param_flags = - sctp_spp_sackdelay_enable(trans->param_flags); - } - } - } + if (params.sack_assoc_id == SCTP_CURRENT_ASSOC || + params.sack_assoc_id == SCTP_ALL_ASSOC) + list_for_each_entry(asoc, &sp->ep->asocs, asocs) + sctp_apply_asoc_delayed_ack(¶ms, asoc); return 0; } @@ -2997,15 +3009,22 @@ static int sctp_setsockopt_default_send_param(struct sock *sk, return -EINVAL; asoc = sctp_id2assoc(sk, info.sinfo_assoc_id); - if (!asoc && info.sinfo_assoc_id && sctp_style(sk, UDP)) + if (!asoc && info.sinfo_assoc_id > SCTP_ALL_ASSOC && + sctp_style(sk, UDP)) return -EINVAL; + if (asoc) { asoc->default_stream = info.sinfo_stream; asoc->default_flags = info.sinfo_flags; asoc->default_ppid = info.sinfo_ppid; asoc->default_context = info.sinfo_context; asoc->default_timetolive = info.sinfo_timetolive; - } else { + + return 0; + } + + if (info.sinfo_assoc_id == SCTP_FUTURE_ASSOC || + info.sinfo_assoc_id == SCTP_ALL_ASSOC) { sp->default_stream = info.sinfo_stream; sp->default_flags = info.sinfo_flags; sp->default_ppid = info.sinfo_ppid; @@ -3013,6 +3032,17 @@ static int sctp_setsockopt_default_send_param(struct sock *sk, sp->default_timetolive = info.sinfo_timetolive; } + if (info.sinfo_assoc_id == SCTP_CURRENT_ASSOC || + info.sinfo_assoc_id == SCTP_ALL_ASSOC) { + list_for_each_entry(asoc, &sp->ep->asocs, asocs) { + asoc->default_stream = info.sinfo_stream; + asoc->default_flags = info.sinfo_flags; + asoc->default_ppid = info.sinfo_ppid; + asoc->default_context = info.sinfo_context; + asoc->default_timetolive = info.sinfo_timetolive; + } + } + return 0; } @@ -3037,20 +3067,37 @@ static int sctp_setsockopt_default_sndinfo(struct sock *sk, return -EINVAL; asoc = sctp_id2assoc(sk, info.snd_assoc_id); - if (!asoc && info.snd_assoc_id && sctp_style(sk, UDP)) + if (!asoc && info.snd_assoc_id > SCTP_ALL_ASSOC && + sctp_style(sk, UDP)) return -EINVAL; + if (asoc) { asoc->default_stream = info.snd_sid; asoc->default_flags = info.snd_flags; asoc->default_ppid = info.snd_ppid; asoc->default_context = info.snd_context; - } else { + + return 0; + } + + if (info.snd_assoc_id == SCTP_FUTURE_ASSOC || + info.snd_assoc_id == SCTP_ALL_ASSOC) { sp->default_stream = info.snd_sid; sp->default_flags = info.snd_flags; sp->default_ppid = info.snd_ppid; sp->default_context = info.snd_context; } + if (info.snd_assoc_id == SCTP_CURRENT_ASSOC || + info.snd_assoc_id == SCTP_ALL_ASSOC) { + list_for_each_entry(asoc, &sp->ep->asocs, asocs) { + asoc->default_stream = info.snd_sid; + asoc->default_flags = info.snd_flags; + asoc->default_ppid = info.snd_ppid; + asoc->default_context = info.snd_context; + } + } + return 0; } @@ -3144,7 +3191,8 @@ static int sctp_setsockopt_rtoinfo(struct sock *sk, char __user *optval, unsigne asoc = sctp_id2assoc(sk, rtoinfo.srto_assoc_id); /* Set the values to the specific association */ - if (!asoc && rtoinfo.srto_assoc_id && sctp_style(sk, UDP)) + if (!asoc && rtoinfo.srto_assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) return -EINVAL; rto_max = rtoinfo.srto_max; @@ -3206,7 +3254,8 @@ static int sctp_setsockopt_associnfo(struct sock *sk, char __user *optval, unsig asoc = sctp_id2assoc(sk, assocparams.sasoc_assoc_id); - if (!asoc && assocparams.sasoc_assoc_id && sctp_style(sk, UDP)) + if (!asoc && assocparams.sasoc_assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) return -EINVAL; /* Set the values to the specific association */ @@ -3319,7 +3368,7 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned current->comm, task_pid_nr(current)); if (copy_from_user(&val, optval, optlen)) return -EFAULT; - params.assoc_id = 0; + params.assoc_id = SCTP_FUTURE_ASSOC; } else if (optlen == sizeof(struct sctp_assoc_value)) { if (copy_from_user(¶ms, optval, optlen)) return -EFAULT; @@ -3329,6 +3378,9 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned } asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) + return -EINVAL; if (val) { int min_len, max_len; @@ -3346,8 +3398,6 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned asoc->user_frag = val; sctp_assoc_update_frag_point(asoc); } else { - if (params.assoc_id && sctp_style(sk, UDP)) - return -EINVAL; sp->user_frag = val; } @@ -3460,8 +3510,8 @@ static int sctp_setsockopt_adaptation_layer(struct sock *sk, char __user *optval static int sctp_setsockopt_context(struct sock *sk, char __user *optval, unsigned int optlen) { + struct sctp_sock *sp = sctp_sk(sk); struct sctp_assoc_value params; - struct sctp_sock *sp; struct sctp_association *asoc; if (optlen != sizeof(struct sctp_assoc_value)) @@ -3469,17 +3519,26 @@ static int sctp_setsockopt_context(struct sock *sk, char __user *optval, if (copy_from_user(¶ms, optval, optlen)) return -EFAULT; - sp = sctp_sk(sk); + asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc && params.assoc_id > SCTP_ALL_ASSOC && + sctp_style(sk, UDP)) + return -EINVAL; - if (params.assoc_id != 0) { - asoc = sctp_id2assoc(sk, params.assoc_id); - if (!asoc) - return -EINVAL; + if (asoc) { asoc->default_rcv_context = params.assoc_value; - } else { - sp->default_rcv_context = params.assoc_value; + + return 0; } + if (params.assoc_id == SCTP_FUTURE_ASSOC || + params.assoc_id == SCTP_ALL_ASSOC) + sp->default_rcv_context = params.assoc_value; + + if (params.assoc_id == SCTP_CURRENT_ASSOC || + params.assoc_id == SCTP_ALL_ASSOC) + list_for_each_entry(asoc, &sp->ep->asocs, asocs) + asoc->default_rcv_context = params.assoc_value; + return 0; } @@ -3580,11 +3639,9 @@ static int sctp_setsockopt_maxburst(struct sock *sk, char __user *optval, unsigned int optlen) { + struct sctp_sock *sp = sctp_sk(sk); struct sctp_assoc_value params; - struct sctp_sock *sp; struct sctp_association *asoc; - int val; - int assoc_id = 0; if (optlen == sizeof(int)) { pr_warn_ratelimited(DEPRECATED @@ -3592,25 +3649,34 @@ static int sctp_setsockopt_maxburst(struct sock *sk, "Use of int in max_burst socket option deprecated.\n" "Use struct sctp_assoc_value instead\n", current->comm, task_pid_nr(current)); - if (copy_from_user(&val, optval, optlen)) + if (copy_from_user(¶ms.assoc_value, optval, optlen)) return -EFAULT; + params.assoc_id = SCTP_FUTURE_ASSOC; } else if (optlen == sizeof(struct sctp_assoc_value)) { if (copy_from_user(¶ms, optval, optlen)) return -EFAULT; - val = params.assoc_value; - assoc_id = params.assoc_id; } else return -EINVAL; - sp = sctp_sk(sk); + asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc && params.assoc_id > SCTP_ALL_ASSOC && + sctp_style(sk, UDP)) + return -EINVAL; - if (assoc_id != 0) { - asoc = sctp_id2assoc(sk, assoc_id); - if (!asoc) - return -EINVAL; - asoc->max_burst = val; - } else - sp->max_burst = val; + if (asoc) { + asoc->max_burst = params.assoc_value; + + return 0; + } + + if (params.assoc_id == SCTP_FUTURE_ASSOC || + params.assoc_id == SCTP_ALL_ASSOC) + sp->max_burst = params.assoc_value; + + if (params.assoc_id == SCTP_CURRENT_ASSOC || + params.assoc_id == SCTP_ALL_ASSOC) + list_for_each_entry(asoc, &sp->ep->asocs, asocs) + asoc->max_burst = params.assoc_value; return 0; } @@ -3702,7 +3768,7 @@ static int sctp_setsockopt_auth_key(struct sock *sk, struct sctp_endpoint *ep = sctp_sk(sk)->ep; struct sctp_authkey *authkey; struct sctp_association *asoc; - int ret; + int ret = -EINVAL; if (!ep->auth_enable) return -EACCES; @@ -3712,25 +3778,44 @@ static int sctp_setsockopt_auth_key(struct sock *sk, /* authkey->sca_keylength is u16, so optlen can't be bigger than * this. */ - optlen = min_t(unsigned int, optlen, USHRT_MAX + - sizeof(struct sctp_authkey)); + optlen = min_t(unsigned int, optlen, USHRT_MAX + sizeof(*authkey)); authkey = memdup_user(optval, optlen); if (IS_ERR(authkey)) return PTR_ERR(authkey); - if (authkey->sca_keylength > optlen - sizeof(struct sctp_authkey)) { - ret = -EINVAL; + if (authkey->sca_keylength > optlen - sizeof(*authkey)) goto out; - } asoc = sctp_id2assoc(sk, authkey->sca_assoc_id); - if (!asoc && authkey->sca_assoc_id && sctp_style(sk, UDP)) { - ret = -EINVAL; + if (!asoc && authkey->sca_assoc_id > SCTP_ALL_ASSOC && + sctp_style(sk, UDP)) + goto out; + + if (asoc) { + ret = sctp_auth_set_key(ep, asoc, authkey); goto out; } - ret = sctp_auth_set_key(ep, asoc, authkey); + if (authkey->sca_assoc_id == SCTP_FUTURE_ASSOC || + authkey->sca_assoc_id == SCTP_ALL_ASSOC) { + ret = sctp_auth_set_key(ep, asoc, authkey); + if (ret) + goto out; + } + + ret = 0; + + if (authkey->sca_assoc_id == SCTP_CURRENT_ASSOC || + authkey->sca_assoc_id == SCTP_ALL_ASSOC) { + list_for_each_entry(asoc, &ep->asocs, asocs) { + int res = sctp_auth_set_key(ep, asoc, authkey); + + if (res && !ret) + ret = res; + } + } + out: kzfree(authkey); return ret; @@ -3747,8 +3832,9 @@ static int sctp_setsockopt_active_key(struct sock *sk, unsigned int optlen) { struct sctp_endpoint *ep = sctp_sk(sk)->ep; - struct sctp_authkeyid val; struct sctp_association *asoc; + struct sctp_authkeyid val; + int ret = 0; if (!ep->auth_enable) return -EACCES; @@ -3759,10 +3845,32 @@ static int sctp_setsockopt_active_key(struct sock *sk, return -EFAULT; asoc = sctp_id2assoc(sk, val.scact_assoc_id); - if (!asoc && val.scact_assoc_id && sctp_style(sk, UDP)) + if (!asoc && val.scact_assoc_id > SCTP_ALL_ASSOC && + sctp_style(sk, UDP)) return -EINVAL; - return sctp_auth_set_active_key(ep, asoc, val.scact_keynumber); + if (asoc) + return sctp_auth_set_active_key(ep, asoc, val.scact_keynumber); + + if (val.scact_assoc_id == SCTP_FUTURE_ASSOC || + val.scact_assoc_id == SCTP_ALL_ASSOC) { + ret = sctp_auth_set_active_key(ep, asoc, val.scact_keynumber); + if (ret) + return ret; + } + + if (val.scact_assoc_id == SCTP_CURRENT_ASSOC || + val.scact_assoc_id == SCTP_ALL_ASSOC) { + list_for_each_entry(asoc, &ep->asocs, asocs) { + int res = sctp_auth_set_active_key(ep, asoc, + val.scact_keynumber); + + if (res && !ret) + ret = res; + } + } + + return ret; } /* @@ -3775,8 +3883,9 @@ static int sctp_setsockopt_del_key(struct sock *sk, unsigned int optlen) { struct sctp_endpoint *ep = sctp_sk(sk)->ep; - struct sctp_authkeyid val; struct sctp_association *asoc; + struct sctp_authkeyid val; + int ret = 0; if (!ep->auth_enable) return -EACCES; @@ -3787,11 +3896,32 @@ static int sctp_setsockopt_del_key(struct sock *sk, return -EFAULT; asoc = sctp_id2assoc(sk, val.scact_assoc_id); - if (!asoc && val.scact_assoc_id && sctp_style(sk, UDP)) + if (!asoc && val.scact_assoc_id > SCTP_ALL_ASSOC && + sctp_style(sk, UDP)) return -EINVAL; - return sctp_auth_del_key_id(ep, asoc, val.scact_keynumber); + if (asoc) + return sctp_auth_del_key_id(ep, asoc, val.scact_keynumber); + + if (val.scact_assoc_id == SCTP_FUTURE_ASSOC || + val.scact_assoc_id == SCTP_ALL_ASSOC) { + ret = sctp_auth_del_key_id(ep, asoc, val.scact_keynumber); + if (ret) + return ret; + } + + if (val.scact_assoc_id == SCTP_CURRENT_ASSOC || + val.scact_assoc_id == SCTP_ALL_ASSOC) { + list_for_each_entry(asoc, &ep->asocs, asocs) { + int res = sctp_auth_del_key_id(ep, asoc, + val.scact_keynumber); + if (res && !ret) + ret = res; + } + } + + return ret; } /* @@ -3803,8 +3933,9 @@ static int sctp_setsockopt_deactivate_key(struct sock *sk, char __user *optval, unsigned int optlen) { struct sctp_endpoint *ep = sctp_sk(sk)->ep; - struct sctp_authkeyid val; struct sctp_association *asoc; + struct sctp_authkeyid val; + int ret = 0; if (!ep->auth_enable) return -EACCES; @@ -3815,10 +3946,32 @@ static int sctp_setsockopt_deactivate_key(struct sock *sk, char __user *optval, return -EFAULT; asoc = sctp_id2assoc(sk, val.scact_assoc_id); - if (!asoc && val.scact_assoc_id && sctp_style(sk, UDP)) + if (!asoc && val.scact_assoc_id > SCTP_ALL_ASSOC && + sctp_style(sk, UDP)) return -EINVAL; - return sctp_auth_deact_key_id(ep, asoc, val.scact_keynumber); + if (asoc) + return sctp_auth_deact_key_id(ep, asoc, val.scact_keynumber); + + if (val.scact_assoc_id == SCTP_FUTURE_ASSOC || + val.scact_assoc_id == SCTP_ALL_ASSOC) { + ret = sctp_auth_deact_key_id(ep, asoc, val.scact_keynumber); + if (ret) + return ret; + } + + if (val.scact_assoc_id == SCTP_CURRENT_ASSOC || + val.scact_assoc_id == SCTP_ALL_ASSOC) { + list_for_each_entry(asoc, &ep->asocs, asocs) { + int res = sctp_auth_deact_key_id(ep, asoc, + val.scact_keynumber); + + if (res && !ret) + ret = res; + } + } + + return ret; } /* @@ -3884,11 +4037,25 @@ static int sctp_setsockopt_paddr_thresholds(struct sock *sk, sizeof(struct sctp_paddrthlds))) return -EFAULT; - - if (sctp_is_any(sk, (const union sctp_addr *)&val.spt_address)) { - asoc = sctp_id2assoc(sk, val.spt_assoc_id); - if (!asoc) + if (!sctp_is_any(sk, (const union sctp_addr *)&val.spt_address)) { + trans = sctp_addr_id2transport(sk, &val.spt_address, + val.spt_assoc_id); + if (!trans) return -ENOENT; + + if (val.spt_pathmaxrxt) + trans->pathmaxrxt = val.spt_pathmaxrxt; + trans->pf_retrans = val.spt_pathpfthld; + + return 0; + } + + asoc = sctp_id2assoc(sk, val.spt_assoc_id); + if (!asoc && val.spt_assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) + return -EINVAL; + + if (asoc) { list_for_each_entry(trans, &asoc->peer.transport_addr_list, transports) { if (val.spt_pathmaxrxt) @@ -3900,14 +4067,11 @@ static int sctp_setsockopt_paddr_thresholds(struct sock *sk, asoc->pathmaxrxt = val.spt_pathmaxrxt; asoc->pf_retrans = val.spt_pathpfthld; } else { - trans = sctp_addr_id2transport(sk, &val.spt_address, - val.spt_assoc_id); - if (!trans) - return -ENOENT; + struct sctp_sock *sp = sctp_sk(sk); if (val.spt_pathmaxrxt) - trans->pathmaxrxt = val.spt_pathmaxrxt; - trans->pf_retrans = val.spt_pathpfthld; + sp->pathmaxrxt = val.spt_pathmaxrxt; + sp->pf_retrans = val.spt_pathpfthld; } return 0; @@ -3950,6 +4114,7 @@ static int sctp_setsockopt_pr_supported(struct sock *sk, unsigned int optlen) { struct sctp_assoc_value params; + struct sctp_association *asoc; if (optlen != sizeof(params)) return -EINVAL; @@ -3957,6 +4122,11 @@ static int sctp_setsockopt_pr_supported(struct sock *sk, if (copy_from_user(¶ms, optval, optlen)) return -EFAULT; + asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) + return -EINVAL; + sctp_sk(sk)->ep->prsctp_enable = !!params.assoc_value; return 0; @@ -3966,6 +4136,7 @@ static int sctp_setsockopt_default_prinfo(struct sock *sk, char __user *optval, unsigned int optlen) { + struct sctp_sock *sp = sctp_sk(sk); struct sctp_default_prinfo info; struct sctp_association *asoc; int retval = -EINVAL; @@ -3985,19 +4156,31 @@ static int sctp_setsockopt_default_prinfo(struct sock *sk, info.pr_value = 0; asoc = sctp_id2assoc(sk, info.pr_assoc_id); + if (!asoc && info.pr_assoc_id > SCTP_ALL_ASSOC && + sctp_style(sk, UDP)) + goto out; + + retval = 0; + if (asoc) { SCTP_PR_SET_POLICY(asoc->default_flags, info.pr_policy); asoc->default_timetolive = info.pr_value; - } else if (!info.pr_assoc_id) { - struct sctp_sock *sp = sctp_sk(sk); + goto out; + } + if (info.pr_assoc_id == SCTP_FUTURE_ASSOC || + info.pr_assoc_id == SCTP_ALL_ASSOC) { SCTP_PR_SET_POLICY(sp->default_flags, info.pr_policy); sp->default_timetolive = info.pr_value; - } else { - goto out; } - retval = 0; + if (info.pr_assoc_id == SCTP_CURRENT_ASSOC || + info.pr_assoc_id == SCTP_ALL_ASSOC) { + list_for_each_entry(asoc, &sp->ep->asocs, asocs) { + SCTP_PR_SET_POLICY(asoc->default_flags, info.pr_policy); + asoc->default_timetolive = info.pr_value; + } + } out: return retval; @@ -4020,15 +4203,14 @@ static int sctp_setsockopt_reconfig_supported(struct sock *sk, } asoc = sctp_id2assoc(sk, params.assoc_id); - if (asoc) { - asoc->reconf_enable = !!params.assoc_value; - } else if (!params.assoc_id) { - struct sctp_sock *sp = sctp_sk(sk); - - sp->ep->reconf_enable = !!params.assoc_value; - } else { + if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) goto out; - } + + if (asoc) + asoc->reconf_enable = !!params.assoc_value; + else + sctp_sk(sk)->ep->reconf_enable = !!params.assoc_value; retval = 0; @@ -4040,6 +4222,7 @@ static int sctp_setsockopt_enable_strreset(struct sock *sk, char __user *optval, unsigned int optlen) { + struct sctp_endpoint *ep = sctp_sk(sk)->ep; struct sctp_assoc_value params; struct sctp_association *asoc; int retval = -EINVAL; @@ -4056,17 +4239,25 @@ static int sctp_setsockopt_enable_strreset(struct sock *sk, goto out; asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc && params.assoc_id > SCTP_ALL_ASSOC && + sctp_style(sk, UDP)) + goto out; + + retval = 0; + if (asoc) { asoc->strreset_enable = params.assoc_value; - } else if (!params.assoc_id) { - struct sctp_sock *sp = sctp_sk(sk); - - sp->ep->strreset_enable = params.assoc_value; - } else { goto out; } - retval = 0; + if (params.assoc_id == SCTP_FUTURE_ASSOC || + params.assoc_id == SCTP_ALL_ASSOC) + ep->strreset_enable = params.assoc_value; + + if (params.assoc_id == SCTP_CURRENT_ASSOC || + params.assoc_id == SCTP_ALL_ASSOC) + list_for_each_entry(asoc, &ep->asocs, asocs) + asoc->strreset_enable = params.assoc_value; out: return retval; @@ -4161,29 +4352,44 @@ static int sctp_setsockopt_scheduler(struct sock *sk, char __user *optval, unsigned int optlen) { + struct sctp_sock *sp = sctp_sk(sk); struct sctp_association *asoc; struct sctp_assoc_value params; - int retval = -EINVAL; + int retval = 0; if (optlen < sizeof(params)) - goto out; + return -EINVAL; optlen = sizeof(params); - if (copy_from_user(¶ms, optval, optlen)) { - retval = -EFAULT; - goto out; - } + if (copy_from_user(¶ms, optval, optlen)) + return -EFAULT; if (params.assoc_value > SCTP_SS_MAX) - goto out; + return -EINVAL; asoc = sctp_id2assoc(sk, params.assoc_id); - if (!asoc) - goto out; + if (!asoc && params.assoc_id > SCTP_ALL_ASSOC && + sctp_style(sk, UDP)) + return -EINVAL; + + if (asoc) + return sctp_sched_set_sched(asoc, params.assoc_value); - retval = sctp_sched_set_sched(asoc, params.assoc_value); + if (params.assoc_id == SCTP_FUTURE_ASSOC || + params.assoc_id == SCTP_ALL_ASSOC) + sp->default_ss = params.assoc_value; + + if (params.assoc_id == SCTP_CURRENT_ASSOC || + params.assoc_id == SCTP_ALL_ASSOC) { + list_for_each_entry(asoc, &sp->ep->asocs, asocs) { + int ret = sctp_sched_set_sched(asoc, + params.assoc_value); + + if (ret && !retval) + retval = ret; + } + } -out: return retval; } @@ -4191,8 +4397,8 @@ static int sctp_setsockopt_scheduler_value(struct sock *sk, char __user *optval, unsigned int optlen) { - struct sctp_association *asoc; struct sctp_stream_value params; + struct sctp_association *asoc; int retval = -EINVAL; if (optlen < sizeof(params)) @@ -4205,11 +4411,24 @@ static int sctp_setsockopt_scheduler_value(struct sock *sk, } asoc = sctp_id2assoc(sk, params.assoc_id); - if (!asoc) + if (!asoc && params.assoc_id != SCTP_CURRENT_ASSOC && + sctp_style(sk, UDP)) goto out; - retval = sctp_sched_set_value(asoc, params.stream_id, - params.stream_value, GFP_KERNEL); + if (asoc) { + retval = sctp_sched_set_value(asoc, params.stream_id, + params.stream_value, GFP_KERNEL); + goto out; + } + + retval = 0; + + list_for_each_entry(asoc, &sctp_sk(sk)->ep->asocs, asocs) { + int ret = sctp_sched_set_value(asoc, params.stream_id, + params.stream_value, GFP_KERNEL); + if (ret && !retval) /* try to return the 1st error. */ + retval = ret; + } out: return retval; @@ -4220,8 +4439,8 @@ static int sctp_setsockopt_interleaving_supported(struct sock *sk, unsigned int optlen) { struct sctp_sock *sp = sctp_sk(sk); - struct net *net = sock_net(sk); struct sctp_assoc_value params; + struct sctp_association *asoc; int retval = -EINVAL; if (optlen < sizeof(params)) @@ -4233,10 +4452,12 @@ static int sctp_setsockopt_interleaving_supported(struct sock *sk, goto out; } - if (params.assoc_id) + asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) goto out; - if (!net->sctp.intl_enable || !sp->frag_interleave) { + if (!sock_net(sk)->sctp.intl_enable || !sp->frag_interleave) { retval = -EPERM; goto out; } @@ -4271,54 +4492,69 @@ static int sctp_setsockopt_reuse_port(struct sock *sk, char __user *optval, return 0; } +static int sctp_assoc_ulpevent_type_set(struct sctp_event *param, + struct sctp_association *asoc) +{ + struct sctp_ulpevent *event; + + sctp_ulpevent_type_set(&asoc->subscribe, param->se_type, param->se_on); + + if (param->se_type == SCTP_SENDER_DRY_EVENT && param->se_on) { + if (sctp_outq_is_empty(&asoc->outqueue)) { + event = sctp_ulpevent_make_sender_dry_event(asoc, + GFP_USER | __GFP_NOWARN); + if (!event) + return -ENOMEM; + + asoc->stream.si->enqueue_event(&asoc->ulpq, event); + } + } + + return 0; +} + static int sctp_setsockopt_event(struct sock *sk, char __user *optval, unsigned int optlen) { + struct sctp_sock *sp = sctp_sk(sk); struct sctp_association *asoc; - struct sctp_ulpevent *event; struct sctp_event param; int retval = 0; - if (optlen < sizeof(param)) { - retval = -EINVAL; - goto out; - } + if (optlen < sizeof(param)) + return -EINVAL; optlen = sizeof(param); - if (copy_from_user(¶m, optval, optlen)) { - retval = -EFAULT; - goto out; - } + if (copy_from_user(¶m, optval, optlen)) + return -EFAULT; if (param.se_type < SCTP_SN_TYPE_BASE || - param.se_type > SCTP_SN_TYPE_MAX) { - retval = -EINVAL; - goto out; - } + param.se_type > SCTP_SN_TYPE_MAX) + return -EINVAL; asoc = sctp_id2assoc(sk, param.se_assoc_id); - if (!asoc) { - sctp_ulpevent_type_set(&sctp_sk(sk)->subscribe, - param.se_type, param.se_on); - goto out; - } + if (!asoc && param.se_assoc_id > SCTP_ALL_ASSOC && + sctp_style(sk, UDP)) + return -EINVAL; - sctp_ulpevent_type_set(&asoc->subscribe, param.se_type, param.se_on); + if (asoc) + return sctp_assoc_ulpevent_type_set(¶m, asoc); - if (param.se_type == SCTP_SENDER_DRY_EVENT && param.se_on) { - if (sctp_outq_is_empty(&asoc->outqueue)) { - event = sctp_ulpevent_make_sender_dry_event(asoc, - GFP_USER | __GFP_NOWARN); - if (!event) { - retval = -ENOMEM; - goto out; - } + if (param.se_assoc_id == SCTP_FUTURE_ASSOC || + param.se_assoc_id == SCTP_ALL_ASSOC) + sctp_ulpevent_type_set(&sp->subscribe, + param.se_type, param.se_on); - asoc->stream.si->enqueue_event(&asoc->ulpq, event); + if (param.se_assoc_id == SCTP_CURRENT_ASSOC || + param.se_assoc_id == SCTP_ALL_ASSOC) { + list_for_each_entry(asoc, &sp->ep->asocs, asocs) { + int ret = sctp_assoc_ulpevent_type_set(¶m, asoc); + + if (ret && !retval) + retval = ret; } } -out: return retval; } @@ -4777,12 +5013,14 @@ static int sctp_init_sock(struct sock *sk) */ sp->hbinterval = net->sctp.hb_interval; sp->pathmaxrxt = net->sctp.max_retrans_path; + sp->pf_retrans = net->sctp.pf_retrans; sp->pathmtu = 0; /* allow default discovery */ sp->sackdelay = net->sctp.sack_timeout; sp->sackfreq = 2; sp->param_flags = SPP_HB_ENABLE | SPP_PMTUD_ENABLE | SPP_SACKDELAY_ENABLE; + sp->default_ss = SCTP_SS_DEFAULT; /* If enabled no SCTP message fragmentation will be performed. * Configure through SCTP_DISABLE_FRAGMENTS socket option. @@ -5676,12 +5914,13 @@ static int sctp_getsockopt_peer_addr_params(struct sock *sk, int len, } } - /* Get association, if assoc_id != 0 and the socket is a one - * to many style socket, and an association was not found, then - * the id was invalid. + /* Get association, if assoc_id != SCTP_FUTURE_ASSOC and the + * socket is a one to many style socket, and an association + * was not found, then the id was invalid. */ asoc = sctp_id2assoc(sk, params.spp_assoc_id); - if (!asoc && params.spp_assoc_id && sctp_style(sk, UDP)) { + if (!asoc && params.spp_assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) { pr_debug("%s: failed no association\n", __func__); return -EINVAL; } @@ -5810,19 +6049,19 @@ static int sctp_getsockopt_delayed_ack(struct sock *sk, int len, } else return -EINVAL; - /* Get association, if sack_assoc_id != 0 and the socket is a one - * to many style socket, and an association was not found, then - * the id was invalid. + /* Get association, if sack_assoc_id != SCTP_FUTURE_ASSOC and the + * socket is a one to many style socket, and an association + * was not found, then the id was invalid. */ asoc = sctp_id2assoc(sk, params.sack_assoc_id); - if (!asoc && params.sack_assoc_id && sctp_style(sk, UDP)) + if (!asoc && params.sack_assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) return -EINVAL; if (asoc) { /* Fetch association values. */ if (asoc->param_flags & SPP_SACKDELAY_ENABLE) { - params.sack_delay = jiffies_to_msecs( - asoc->sackdelay); + params.sack_delay = jiffies_to_msecs(asoc->sackdelay); params.sack_freq = asoc->sackfreq; } else { @@ -6175,8 +6414,10 @@ static int sctp_getsockopt_default_send_param(struct sock *sk, return -EFAULT; asoc = sctp_id2assoc(sk, info.sinfo_assoc_id); - if (!asoc && info.sinfo_assoc_id && sctp_style(sk, UDP)) + if (!asoc && info.sinfo_assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) return -EINVAL; + if (asoc) { info.sinfo_stream = asoc->default_stream; info.sinfo_flags = asoc->default_flags; @@ -6219,8 +6460,10 @@ static int sctp_getsockopt_default_sndinfo(struct sock *sk, int len, return -EFAULT; asoc = sctp_id2assoc(sk, info.snd_assoc_id); - if (!asoc && info.snd_assoc_id && sctp_style(sk, UDP)) + if (!asoc && info.snd_assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) return -EINVAL; + if (asoc) { info.snd_sid = asoc->default_stream; info.snd_flags = asoc->default_flags; @@ -6296,7 +6539,8 @@ static int sctp_getsockopt_rtoinfo(struct sock *sk, int len, asoc = sctp_id2assoc(sk, rtoinfo.srto_assoc_id); - if (!asoc && rtoinfo.srto_assoc_id && sctp_style(sk, UDP)) + if (!asoc && rtoinfo.srto_assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) return -EINVAL; /* Values corresponding to the specific association. */ @@ -6353,7 +6597,8 @@ static int sctp_getsockopt_associnfo(struct sock *sk, int len, asoc = sctp_id2assoc(sk, assocparams.sasoc_assoc_id); - if (!asoc && assocparams.sasoc_assoc_id && sctp_style(sk, UDP)) + if (!asoc && assocparams.sasoc_assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) return -EINVAL; /* Values correspoinding to the specific association */ @@ -6428,7 +6673,6 @@ static int sctp_getsockopt_context(struct sock *sk, int len, char __user *optval, int __user *optlen) { struct sctp_assoc_value params; - struct sctp_sock *sp; struct sctp_association *asoc; if (len < sizeof(struct sctp_assoc_value)) @@ -6439,16 +6683,13 @@ static int sctp_getsockopt_context(struct sock *sk, int len, if (copy_from_user(¶ms, optval, len)) return -EFAULT; - sp = sctp_sk(sk); + asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) + return -EINVAL; - if (params.assoc_id != 0) { - asoc = sctp_id2assoc(sk, params.assoc_id); - if (!asoc) - return -EINVAL; - params.assoc_value = asoc->default_rcv_context; - } else { - params.assoc_value = sp->default_rcv_context; - } + params.assoc_value = asoc ? asoc->default_rcv_context + : sctp_sk(sk)->default_rcv_context; if (put_user(len, optlen)) return -EFAULT; @@ -6497,7 +6738,7 @@ static int sctp_getsockopt_maxseg(struct sock *sk, int len, "Use of int in maxseg socket option.\n" "Use struct sctp_assoc_value instead\n", current->comm, task_pid_nr(current)); - params.assoc_id = 0; + params.assoc_id = SCTP_FUTURE_ASSOC; } else if (len >= sizeof(struct sctp_assoc_value)) { len = sizeof(struct sctp_assoc_value); if (copy_from_user(¶ms, optval, len)) @@ -6506,7 +6747,8 @@ static int sctp_getsockopt_maxseg(struct sock *sk, int len, return -EINVAL; asoc = sctp_id2assoc(sk, params.assoc_id); - if (!asoc && params.assoc_id && sctp_style(sk, UDP)) + if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) return -EINVAL; if (asoc) @@ -6583,7 +6825,6 @@ static int sctp_getsockopt_maxburst(struct sock *sk, int len, int __user *optlen) { struct sctp_assoc_value params; - struct sctp_sock *sp; struct sctp_association *asoc; if (len == sizeof(int)) { @@ -6592,7 +6833,7 @@ static int sctp_getsockopt_maxburst(struct sock *sk, int len, "Use of int in max_burst socket option.\n" "Use struct sctp_assoc_value instead\n", current->comm, task_pid_nr(current)); - params.assoc_id = 0; + params.assoc_id = SCTP_FUTURE_ASSOC; } else if (len >= sizeof(struct sctp_assoc_value)) { len = sizeof(struct sctp_assoc_value); if (copy_from_user(¶ms, optval, len)) @@ -6600,15 +6841,12 @@ static int sctp_getsockopt_maxburst(struct sock *sk, int len, } else return -EINVAL; - sp = sctp_sk(sk); + asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) + return -EINVAL; - if (params.assoc_id != 0) { - asoc = sctp_id2assoc(sk, params.assoc_id); - if (!asoc) - return -EINVAL; - params.assoc_value = asoc->max_burst; - } else - params.assoc_value = sp->max_burst; + params.assoc_value = asoc ? asoc->max_burst : sctp_sk(sk)->max_burst; if (len == sizeof(int)) { if (copy_to_user(optval, ¶ms.assoc_value, len)) @@ -6759,14 +6997,12 @@ static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len, to = p->gauth_chunks; asoc = sctp_id2assoc(sk, val.gauth_assoc_id); - if (!asoc && val.gauth_assoc_id && sctp_style(sk, UDP)) + if (!asoc && val.gauth_assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) return -EINVAL; - if (asoc) - ch = (struct sctp_chunks_param *)asoc->c.auth_chunks; - else - ch = ep->auth_chunk_list; - + ch = asoc ? (struct sctp_chunks_param *)asoc->c.auth_chunks + : ep->auth_chunk_list; if (!ch) goto num; @@ -6911,14 +7147,7 @@ static int sctp_getsockopt_paddr_thresholds(struct sock *sk, if (copy_from_user(&val, (struct sctp_paddrthlds __user *)optval, len)) return -EFAULT; - if (sctp_is_any(sk, (const union sctp_addr *)&val.spt_address)) { - asoc = sctp_id2assoc(sk, val.spt_assoc_id); - if (!asoc) - return -ENOENT; - - val.spt_pathpfthld = asoc->pf_retrans; - val.spt_pathmaxrxt = asoc->pathmaxrxt; - } else { + if (!sctp_is_any(sk, (const union sctp_addr *)&val.spt_address)) { trans = sctp_addr_id2transport(sk, &val.spt_address, val.spt_assoc_id); if (!trans) @@ -6926,6 +7155,23 @@ static int sctp_getsockopt_paddr_thresholds(struct sock *sk, val.spt_pathmaxrxt = trans->pathmaxrxt; val.spt_pathpfthld = trans->pf_retrans; + + return 0; + } + + asoc = sctp_id2assoc(sk, val.spt_assoc_id); + if (!asoc && val.spt_assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) + return -EINVAL; + + if (asoc) { + val.spt_pathpfthld = asoc->pf_retrans; + val.spt_pathmaxrxt = asoc->pathmaxrxt; + } else { + struct sctp_sock *sp = sctp_sk(sk); + + val.spt_pathpfthld = sp->pf_retrans; + val.spt_pathmaxrxt = sp->pathmaxrxt; } if (put_user(len, optlen) || copy_to_user(optval, &val, len)) @@ -7056,17 +7302,15 @@ static int sctp_getsockopt_pr_supported(struct sock *sk, int len, goto out; asoc = sctp_id2assoc(sk, params.assoc_id); - if (asoc) { - params.assoc_value = asoc->prsctp_enable; - } else if (!params.assoc_id) { - struct sctp_sock *sp = sctp_sk(sk); - - params.assoc_value = sp->ep->prsctp_enable; - } else { + if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) { retval = -EINVAL; goto out; } + params.assoc_value = asoc ? asoc->prsctp_enable + : sctp_sk(sk)->ep->prsctp_enable; + if (put_user(len, optlen)) goto out; @@ -7097,17 +7341,20 @@ static int sctp_getsockopt_default_prinfo(struct sock *sk, int len, goto out; asoc = sctp_id2assoc(sk, info.pr_assoc_id); + if (!asoc && info.pr_assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) { + retval = -EINVAL; + goto out; + } + if (asoc) { info.pr_policy = SCTP_PR_POLICY(asoc->default_flags); info.pr_value = asoc->default_timetolive; - } else if (!info.pr_assoc_id) { + } else { struct sctp_sock *sp = sctp_sk(sk); info.pr_policy = SCTP_PR_POLICY(sp->default_flags); info.pr_value = sp->default_timetolive; - } else { - retval = -EINVAL; - goto out; } if (put_user(len, optlen)) @@ -7263,17 +7510,15 @@ static int sctp_getsockopt_reconfig_supported(struct sock *sk, int len, goto out; asoc = sctp_id2assoc(sk, params.assoc_id); - if (asoc) { - params.assoc_value = asoc->reconf_enable; - } else if (!params.assoc_id) { - struct sctp_sock *sp = sctp_sk(sk); - - params.assoc_value = sp->ep->reconf_enable; - } else { + if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) { retval = -EINVAL; goto out; } + params.assoc_value = asoc ? asoc->reconf_enable + : sctp_sk(sk)->ep->reconf_enable; + if (put_user(len, optlen)) goto out; @@ -7304,17 +7549,15 @@ static int sctp_getsockopt_enable_strreset(struct sock *sk, int len, goto out; asoc = sctp_id2assoc(sk, params.assoc_id); - if (asoc) { - params.assoc_value = asoc->strreset_enable; - } else if (!params.assoc_id) { - struct sctp_sock *sp = sctp_sk(sk); - - params.assoc_value = sp->ep->strreset_enable; - } else { + if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) { retval = -EINVAL; goto out; } + params.assoc_value = asoc ? asoc->strreset_enable + : sctp_sk(sk)->ep->strreset_enable; + if (put_user(len, optlen)) goto out; @@ -7345,12 +7588,14 @@ static int sctp_getsockopt_scheduler(struct sock *sk, int len, goto out; asoc = sctp_id2assoc(sk, params.assoc_id); - if (!asoc) { + if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) { retval = -EINVAL; goto out; } - params.assoc_value = sctp_sched_get_sched(asoc); + params.assoc_value = asoc ? sctp_sched_get_sched(asoc) + : sctp_sk(sk)->default_ss; if (put_user(len, optlen)) goto out; @@ -7424,17 +7669,15 @@ static int sctp_getsockopt_interleaving_supported(struct sock *sk, int len, goto out; asoc = sctp_id2assoc(sk, params.assoc_id); - if (asoc) { - params.assoc_value = asoc->intl_enable; - } else if (!params.assoc_id) { - struct sctp_sock *sp = sctp_sk(sk); - - params.assoc_value = sp->strm_interleave; - } else { + if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) { retval = -EINVAL; goto out; } + params.assoc_value = asoc ? asoc->intl_enable + : sctp_sk(sk)->strm_interleave; + if (put_user(len, optlen)) goto out; @@ -7486,6 +7729,10 @@ static int sctp_getsockopt_event(struct sock *sk, int len, char __user *optval, return -EINVAL; asoc = sctp_id2assoc(sk, param.se_assoc_id); + if (!asoc && param.se_assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) + return -EINVAL; + subscribe = asoc ? asoc->subscribe : sctp_sk(sk)->subscribe; param.se_on = sctp_ulpevent_type_enabled(subscribe, param.se_type); diff --git a/net/sctp/stream.c b/net/sctp/stream.c index 80e0ae5534ec..2936ed17bf9e 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -84,6 +84,19 @@ static void fa_zero(struct flex_array *fa, size_t index, size_t count) } } +static size_t fa_index(struct flex_array *fa, void *elem, size_t count) +{ + size_t index = 0; + + while (count--) { + if (elem == flex_array_get(fa, index)) + break; + index++; + } + + return index; +} + /* Migrates chunks from stream queues to new stream queues if needed, * but not across associations. Also, removes those chunks to streams * higher than the new max. @@ -131,8 +144,10 @@ static void sctp_stream_outq_migrate(struct sctp_stream *stream, } } - for (i = outcnt; i < stream->outcnt; i++) + for (i = outcnt; i < stream->outcnt; i++) { kfree(SCTP_SO(stream, i)->ext); + SCTP_SO(stream, i)->ext = NULL; + } } static int sctp_stream_alloc_out(struct sctp_stream *stream, __u16 outcnt, @@ -147,6 +162,13 @@ static int sctp_stream_alloc_out(struct sctp_stream *stream, __u16 outcnt, if (stream->out) { fa_copy(out, stream->out, 0, min(outcnt, stream->outcnt)); + if (stream->out_curr) { + size_t index = fa_index(stream->out, stream->out_curr, + stream->outcnt); + + BUG_ON(index == stream->outcnt); + stream->out_curr = flex_array_get(out, index); + } fa_free(stream->out); } diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index c4e56602e0c6..46fa9f3016cc 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -42,8 +42,11 @@ #include "smc_rx.h" #include "smc_close.h" -static DEFINE_MUTEX(smc_create_lgr_pending); /* serialize link group - * creation +static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group + * creation on server + */ +static DEFINE_MUTEX(smc_client_lgr_pending); /* serialize link group + * creation on client */ static void smc_tcp_listen_work(struct work_struct *); @@ -145,32 +148,33 @@ static int smc_release(struct socket *sock) rc = smc_close_active(smc); sock_set_flag(sk, SOCK_DEAD); sk->sk_shutdown |= SHUTDOWN_MASK; - } - - sk->sk_prot->unhash(sk); - - if (smc->clcsock) { - if (smc->use_fallback && sk->sk_state == SMC_LISTEN) { + } else { + if (sk->sk_state != SMC_LISTEN && sk->sk_state != SMC_INIT) + sock_put(sk); /* passive closing */ + if (sk->sk_state == SMC_LISTEN) { /* wake up clcsock accept */ rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR); } - mutex_lock(&smc->clcsock_release_lock); - sock_release(smc->clcsock); - smc->clcsock = NULL; - mutex_unlock(&smc->clcsock_release_lock); - } - if (smc->use_fallback) { - if (sk->sk_state != SMC_LISTEN && sk->sk_state != SMC_INIT) - sock_put(sk); /* passive closing */ sk->sk_state = SMC_CLOSED; sk->sk_state_change(sk); } + sk->sk_prot->unhash(sk); + + if (sk->sk_state == SMC_CLOSED) { + if (smc->clcsock) { + mutex_lock(&smc->clcsock_release_lock); + sock_release(smc->clcsock); + smc->clcsock = NULL; + mutex_unlock(&smc->clcsock_release_lock); + } + if (!smc->use_fallback) + smc_conn_free(&smc->conn); + } + /* detach socket */ sock_orphan(sk); sock->sk = NULL; - if (!smc->use_fallback && sk->sk_state == SMC_CLOSED) - smc_conn_free(&smc->conn); release_sock(sk); sock_put(sk); /* final sock_put */ @@ -291,7 +295,8 @@ static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk, (1UL << SOCK_RXQ_OVFL) | \ (1UL << SOCK_WIFI_STATUS) | \ (1UL << SOCK_NOFCS) | \ - (1UL << SOCK_FILTER_LOCKED)) + (1UL << SOCK_FILTER_LOCKED) | \ + (1UL << SOCK_TSTAMP_NEW)) /* copy only relevant settings and flags of SOL_SOCKET level from smc to * clc socket (since smc is not called for these options from net/core) */ @@ -475,7 +480,12 @@ static int smc_connect_abort(struct smc_sock *smc, int reason_code, { if (local_contact == SMC_FIRST_CONTACT) smc_lgr_forget(smc->conn.lgr); - mutex_unlock(&smc_create_lgr_pending); + if (smc->conn.lgr->is_smcd) + /* there is only one lgr role for SMC-D; use server lock */ + mutex_unlock(&smc_server_lgr_pending); + else + mutex_unlock(&smc_client_lgr_pending); + smc_conn_free(&smc->conn); return reason_code; } @@ -560,7 +570,7 @@ static int smc_connect_rdma(struct smc_sock *smc, struct smc_link *link; int reason_code = 0; - mutex_lock(&smc_create_lgr_pending); + mutex_lock(&smc_client_lgr_pending); local_contact = smc_conn_create(smc, false, aclc->hdr.flag, ibdev, ibport, ntoh24(aclc->qpn), &aclc->lcl, NULL, 0); @@ -571,7 +581,8 @@ static int smc_connect_rdma(struct smc_sock *smc, reason_code = SMC_CLC_DECL_SYNCERR; /* synchr. error */ else reason_code = SMC_CLC_DECL_INTERR; /* other error */ - return smc_connect_abort(smc, reason_code, 0); + mutex_unlock(&smc_client_lgr_pending); + return reason_code; } link = &smc->conn.lgr->lnk[SMC_SINGLE_LINK]; @@ -615,7 +626,7 @@ static int smc_connect_rdma(struct smc_sock *smc, return smc_connect_abort(smc, reason_code, local_contact); } - mutex_unlock(&smc_create_lgr_pending); + mutex_unlock(&smc_client_lgr_pending); smc_copy_sock_settings_to_clc(smc); if (smc->sk.sk_state == SMC_INIT) @@ -632,11 +643,14 @@ static int smc_connect_ism(struct smc_sock *smc, int local_contact = SMC_FIRST_CONTACT; int rc = 0; - mutex_lock(&smc_create_lgr_pending); + /* there is only one lgr role for SMC-D; use server lock */ + mutex_lock(&smc_server_lgr_pending); local_contact = smc_conn_create(smc, true, aclc->hdr.flag, NULL, 0, 0, NULL, ismdev, aclc->gid); - if (local_contact < 0) - return smc_connect_abort(smc, SMC_CLC_DECL_MEM, 0); + if (local_contact < 0) { + mutex_unlock(&smc_server_lgr_pending); + return SMC_CLC_DECL_MEM; + } /* Create send and receive buffers */ if (smc_buf_create(smc, true)) @@ -650,7 +664,7 @@ static int smc_connect_ism(struct smc_sock *smc, rc = smc_clc_send_confirm(smc); if (rc) return smc_connect_abort(smc, rc, local_contact); - mutex_unlock(&smc_create_lgr_pending); + mutex_unlock(&smc_server_lgr_pending); smc_copy_sock_settings_to_clc(smc); if (smc->sk.sk_state == SMC_INIT) @@ -1249,7 +1263,7 @@ static void smc_listen_work(struct work_struct *work) return; } - mutex_lock(&smc_create_lgr_pending); + mutex_lock(&smc_server_lgr_pending); smc_close_init(new_smc); smc_rx_init(new_smc); smc_tx_init(new_smc); @@ -1271,7 +1285,7 @@ static void smc_listen_work(struct work_struct *work) &local_contact) || smc_listen_rdma_reg(new_smc, local_contact))) { /* SMC not supported, decline */ - mutex_unlock(&smc_create_lgr_pending); + mutex_unlock(&smc_server_lgr_pending); smc_listen_decline(new_smc, SMC_CLC_DECL_MODEUNSUPP, local_contact); return; @@ -1280,29 +1294,33 @@ static void smc_listen_work(struct work_struct *work) /* send SMC Accept CLC message */ rc = smc_clc_send_accept(new_smc, local_contact); if (rc) { - mutex_unlock(&smc_create_lgr_pending); + mutex_unlock(&smc_server_lgr_pending); smc_listen_decline(new_smc, rc, local_contact); return; } + /* SMC-D does not need this lock any more */ + if (ism_supported) + mutex_unlock(&smc_server_lgr_pending); + /* receive SMC Confirm CLC message */ reason_code = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc), SMC_CLC_CONFIRM, CLC_WAIT_TIME); if (reason_code) { - mutex_unlock(&smc_create_lgr_pending); + if (!ism_supported) + mutex_unlock(&smc_server_lgr_pending); smc_listen_decline(new_smc, reason_code, local_contact); return; } /* finish worker */ if (!ism_supported) { - if (smc_listen_rdma_finish(new_smc, &cclc, local_contact)) { - mutex_unlock(&smc_create_lgr_pending); + rc = smc_listen_rdma_finish(new_smc, &cclc, local_contact); + mutex_unlock(&smc_server_lgr_pending); + if (rc) return; - } } smc_conn_save_peer_info(new_smc, &cclc); - mutex_unlock(&smc_create_lgr_pending); smc_listen_out_connected(new_smc); } @@ -1505,6 +1523,11 @@ static int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, smc = smc_sk(sk); lock_sock(sk); + if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) { + /* socket was connected before, no more data to read */ + rc = 0; + goto out; + } if ((sk->sk_state == SMC_INIT) || (sk->sk_state == SMC_LISTEN) || (sk->sk_state == SMC_CLOSED)) @@ -1840,7 +1863,11 @@ static ssize_t smc_splice_read(struct socket *sock, loff_t *ppos, smc = smc_sk(sk); lock_sock(sk); - + if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) { + /* socket was connected before, no more data to read */ + rc = 0; + goto out; + } if (sk->sk_state == SMC_INIT || sk->sk_state == SMC_LISTEN || sk->sk_state == SMC_CLOSED) diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index db83332ac1c8..d0b0f4c865b4 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -21,13 +21,6 @@ /********************************** send *************************************/ -struct smc_cdc_tx_pend { - struct smc_connection *conn; /* socket connection */ - union smc_host_cursor cursor; /* tx sndbuf cursor sent */ - union smc_host_cursor p_cursor; /* rx RMBE cursor produced */ - u16 ctrl_seq; /* conn. tx sequence # */ -}; - /* handler for send/transmission completion of a CDC msg */ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd, struct smc_link *link, @@ -61,12 +54,14 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd, int smc_cdc_get_free_slot(struct smc_connection *conn, struct smc_wr_buf **wr_buf, + struct smc_rdma_wr **wr_rdma_buf, struct smc_cdc_tx_pend **pend) { struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK]; int rc; rc = smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf, + wr_rdma_buf, (struct smc_wr_tx_pend_priv **)pend); if (!conn->alert_token_local) /* abnormal termination */ @@ -96,6 +91,7 @@ int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf, struct smc_cdc_tx_pend *pend) { + union smc_host_cursor cfed; struct smc_link *link; int rc; @@ -105,12 +101,12 @@ int smc_cdc_msg_send(struct smc_connection *conn, conn->tx_cdc_seq++; conn->local_tx_ctrl.seqno = conn->tx_cdc_seq; - smc_host_msg_to_cdc((struct smc_cdc_msg *)wr_buf, - &conn->local_tx_ctrl, conn); + smc_host_msg_to_cdc((struct smc_cdc_msg *)wr_buf, conn, &cfed); rc = smc_wr_tx_send(link, (struct smc_wr_tx_pend_priv *)pend); - if (!rc) - smc_curs_copy(&conn->rx_curs_confirmed, - &conn->local_tx_ctrl.cons, conn); + if (!rc) { + smc_curs_copy(&conn->rx_curs_confirmed, &cfed, conn); + conn->local_rx_ctrl.prod_flags.cons_curs_upd_req = 0; + } return rc; } @@ -121,11 +117,14 @@ static int smcr_cdc_get_slot_and_msg_send(struct smc_connection *conn) struct smc_wr_buf *wr_buf; int rc; - rc = smc_cdc_get_free_slot(conn, &wr_buf, &pend); + rc = smc_cdc_get_free_slot(conn, &wr_buf, NULL, &pend); if (rc) return rc; - return smc_cdc_msg_send(conn, wr_buf, pend); + spin_lock_bh(&conn->send_lock); + rc = smc_cdc_msg_send(conn, wr_buf, pend); + spin_unlock_bh(&conn->send_lock); + return rc; } int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn) @@ -195,6 +194,7 @@ int smcd_cdc_msg_send(struct smc_connection *conn) if (rc) return rc; smc_curs_copy(&conn->rx_curs_confirmed, &curs, conn); + conn->local_rx_ctrl.prod_flags.cons_curs_upd_req = 0; /* Calculate transmitted data and increment free send buffer space */ diff = smc_curs_diff(conn->sndbuf_desc->len, &conn->tx_curs_fin, &conn->tx_curs_sent); @@ -271,26 +271,18 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc, smp_mb__after_atomic(); smc->sk.sk_data_ready(&smc->sk); } else { - if (conn->local_rx_ctrl.prod_flags.write_blocked || - conn->local_rx_ctrl.prod_flags.cons_curs_upd_req || - conn->local_rx_ctrl.prod_flags.urg_data_pending) { - if (conn->local_rx_ctrl.prod_flags.urg_data_pending) - conn->urg_state = SMC_URG_NOTYET; - /* force immediate tx of current consumer cursor, but - * under send_lock to guarantee arrival in seqno-order - */ - if (smc->sk.sk_state != SMC_INIT) - smc_tx_sndbuf_nonempty(conn); - } + if (conn->local_rx_ctrl.prod_flags.write_blocked) + smc->sk.sk_data_ready(&smc->sk); + if (conn->local_rx_ctrl.prod_flags.urg_data_pending) + conn->urg_state = SMC_URG_NOTYET; } - /* piggy backed tx info */ /* trigger sndbuf consumer: RDMA write into peer RMBE and CDC */ - if (diff_cons && smc_tx_prepared_sends(conn)) { + if ((diff_cons && smc_tx_prepared_sends(conn)) || + conn->local_rx_ctrl.prod_flags.cons_curs_upd_req || + conn->local_rx_ctrl.prod_flags.urg_data_pending) smc_tx_sndbuf_nonempty(conn); - /* trigger socket release if connection closed */ - smc_close_wake_tx_prepared(smc); - } + if (diff_cons && conn->urg_tx_pend && atomic_read(&conn->peer_rmbe_space) == conn->peer_rmbe_size) { /* urg data confirmed by peer, indicate we're ready for more */ diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h index b5bfe38c7f9b..861dc24c588c 100644 --- a/net/smc/smc_cdc.h +++ b/net/smc/smc_cdc.h @@ -160,7 +160,9 @@ static inline void smcd_curs_copy(union smcd_cdc_cursor *tgt, #endif } -/* calculate cursor difference between old and new, where old <= new */ +/* calculate cursor difference between old and new, where old <= new and + * difference cannot exceed size + */ static inline int smc_curs_diff(unsigned int size, union smc_host_cursor *old, union smc_host_cursor *new) @@ -185,28 +187,51 @@ static inline int smc_curs_comp(unsigned int size, return smc_curs_diff(size, old, new); } +/* calculate cursor difference between old and new, where old <= new and + * difference may exceed size + */ +static inline int smc_curs_diff_large(unsigned int size, + union smc_host_cursor *old, + union smc_host_cursor *new) +{ + if (old->wrap < new->wrap) + return min_t(int, + (size - old->count) + new->count + + (new->wrap - old->wrap - 1) * size, + size); + + if (old->wrap > new->wrap) /* wrap has switched from 0xffff to 0x0000 */ + return min_t(int, + (size - old->count) + new->count + + (new->wrap + 0xffff - old->wrap) * size, + size); + + return max_t(int, 0, (new->count - old->count)); +} + static inline void smc_host_cursor_to_cdc(union smc_cdc_cursor *peer, union smc_host_cursor *local, + union smc_host_cursor *save, struct smc_connection *conn) { - union smc_host_cursor temp; - - smc_curs_copy(&temp, local, conn); - peer->count = htonl(temp.count); - peer->wrap = htons(temp.wrap); + smc_curs_copy(save, local, conn); + peer->count = htonl(save->count); + peer->wrap = htons(save->wrap); /* peer->reserved = htons(0); must be ensured by caller */ } static inline void smc_host_msg_to_cdc(struct smc_cdc_msg *peer, - struct smc_host_cdc_msg *local, - struct smc_connection *conn) + struct smc_connection *conn, + union smc_host_cursor *save) { + struct smc_host_cdc_msg *local = &conn->local_tx_ctrl; + peer->common.type = local->common.type; peer->len = local->len; peer->seqno = htons(local->seqno); peer->token = htonl(local->token); - smc_host_cursor_to_cdc(&peer->prod, &local->prod, conn); - smc_host_cursor_to_cdc(&peer->cons, &local->cons, conn); + smc_host_cursor_to_cdc(&peer->prod, &local->prod, save, conn); + smc_host_cursor_to_cdc(&peer->cons, &local->cons, save, conn); peer->prod_flags = local->prod_flags; peer->conn_state_flags = local->conn_state_flags; } @@ -245,17 +270,18 @@ static inline void smcr_cdc_msg_to_host(struct smc_host_cdc_msg *local, } static inline void smcd_cdc_msg_to_host(struct smc_host_cdc_msg *local, - struct smcd_cdc_msg *peer) + struct smcd_cdc_msg *peer, + struct smc_connection *conn) { union smc_host_cursor temp; temp.wrap = peer->prod.wrap; temp.count = peer->prod.count; - atomic64_set(&local->prod.acurs, atomic64_read(&temp.acurs)); + smc_curs_copy(&local->prod, &temp, conn); temp.wrap = peer->cons.wrap; temp.count = peer->cons.count; - atomic64_set(&local->cons.acurs, atomic64_read(&temp.acurs)); + smc_curs_copy(&local->cons, &temp, conn); local->prod_flags = peer->cons.prod_flags; local->conn_state_flags = peer->cons.conn_state_flags; } @@ -265,15 +291,21 @@ static inline void smc_cdc_msg_to_host(struct smc_host_cdc_msg *local, struct smc_connection *conn) { if (conn->lgr->is_smcd) - smcd_cdc_msg_to_host(local, (struct smcd_cdc_msg *)peer); + smcd_cdc_msg_to_host(local, (struct smcd_cdc_msg *)peer, conn); else smcr_cdc_msg_to_host(local, peer, conn); } -struct smc_cdc_tx_pend; +struct smc_cdc_tx_pend { + struct smc_connection *conn; /* socket connection */ + union smc_host_cursor cursor; /* tx sndbuf cursor sent */ + union smc_host_cursor p_cursor; /* rx RMBE cursor produced */ + u16 ctrl_seq; /* conn. tx sequence # */ +}; int smc_cdc_get_free_slot(struct smc_connection *conn, struct smc_wr_buf **wr_buf, + struct smc_rdma_wr **wr_rdma_buf, struct smc_cdc_tx_pend **pend); void smc_cdc_tx_dismiss_slots(struct smc_connection *conn); int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf, diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 776e9dfc915d..d53fd588d1f5 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -378,7 +378,7 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info) vec.iov_len = sizeof(struct smc_clc_msg_decline); len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, sizeof(struct smc_clc_msg_decline)); - if (len < sizeof(struct smc_clc_msg_decline)) + if (len < 0 || len < sizeof(struct smc_clc_msg_decline)) len = -EPROTO; return len > 0 ? 0 : len; } diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c index ea2b87f29469..2ad37e998509 100644 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c @@ -345,14 +345,7 @@ static void smc_close_passive_work(struct work_struct *work) switch (sk->sk_state) { case SMC_INIT: - if (atomic_read(&conn->bytes_to_rcv) || - (rxflags->peer_done_writing && - !smc_cdc_rxed_any_close(conn))) { - sk->sk_state = SMC_APPCLOSEWAIT1; - } else { - sk->sk_state = SMC_CLOSED; - sock_put(sk); /* passive closing */ - } + sk->sk_state = SMC_APPCLOSEWAIT1; break; case SMC_ACTIVE: sk->sk_state = SMC_APPCLOSEWAIT1; @@ -405,8 +398,13 @@ wakeup: if (old_state != sk->sk_state) { sk->sk_state_change(sk); if ((sk->sk_state == SMC_CLOSED) && - (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) + (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) { smc_conn_free(conn); + if (smc->clcsock) { + sock_release(smc->clcsock); + smc->clcsock = NULL; + } + } } release_sock(sk); sock_put(sk); /* sock_hold done by schedulers of close_work */ diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 35c1cdc93e1c..53a17cfa61af 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -118,7 +118,6 @@ static void __smc_lgr_unregister_conn(struct smc_connection *conn) rb_erase(&conn->alert_node, &lgr->conns_all); lgr->conns_num--; conn->alert_token_local = 0; - conn->lgr = NULL; sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */ } @@ -128,6 +127,8 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn) { struct smc_link_group *lgr = conn->lgr; + if (!lgr) + return; write_lock_bh(&lgr->conns_lock); if (conn->alert_token_local) { __smc_lgr_unregister_conn(conn); @@ -159,8 +160,6 @@ static void smc_lgr_free_work(struct work_struct *work) bool conns; spin_lock_bh(&smc_lgr_list.lock); - if (list_empty(&lgr->list)) - goto free; read_lock_bh(&lgr->conns_lock); conns = RB_EMPTY_ROOT(&lgr->conns_all); read_unlock_bh(&lgr->conns_lock); @@ -168,8 +167,8 @@ static void smc_lgr_free_work(struct work_struct *work) spin_unlock_bh(&smc_lgr_list.lock); return; } - list_del_init(&lgr->list); /* remove from smc_lgr_list */ -free: + if (!list_empty(&lgr->list)) + list_del_init(&lgr->list); /* remove from smc_lgr_list */ spin_unlock_bh(&smc_lgr_list.lock); if (!lgr->is_smcd && !lgr->terminating) { @@ -300,13 +299,13 @@ static void smc_buf_unuse(struct smc_connection *conn, conn->sndbuf_desc->used = 0; if (conn->rmb_desc) { if (!conn->rmb_desc->regerr) { - conn->rmb_desc->used = 0; if (!lgr->is_smcd) { /* unregister rmb with peer */ smc_llc_do_delete_rkey( &lgr->lnk[SMC_SINGLE_LINK], conn->rmb_desc); } + conn->rmb_desc->used = 0; } else { /* buf registration failed, reuse not possible */ write_lock_bh(&lgr->rmbs_lock); @@ -331,8 +330,9 @@ void smc_conn_free(struct smc_connection *conn) } else { smc_cdc_tx_dismiss_slots(conn); } - smc_lgr_unregister_conn(conn); /* unsets conn->lgr */ + smc_lgr_unregister_conn(conn); smc_buf_unuse(conn, lgr); /* allow buffer reuse */ + conn->lgr = NULL; if (!lgr->conns_num) smc_lgr_schedule_free_work(lgr); @@ -462,6 +462,7 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr) sock_hold(&smc->sk); /* sock_put in close work */ conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; __smc_lgr_unregister_conn(conn); + conn->lgr = NULL; write_unlock_bh(&lgr->conns_lock); if (!schedule_work(&conn->close_work)) sock_put(&smc->sk); @@ -628,6 +629,8 @@ int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact, local_contact = SMC_REUSE_CONTACT; conn->lgr = lgr; smc_lgr_register_conn(conn); /* add smc conn to lgr */ + if (delayed_work_pending(&lgr->free_work)) + cancel_delayed_work(&lgr->free_work); write_unlock_bh(&lgr->conns_lock); break; } diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index b00287989a3d..8806d2afa6ed 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -52,6 +52,24 @@ enum smc_wr_reg_state { FAILED /* ib_wr_reg_mr response: failure */ }; +struct smc_rdma_sge { /* sges for RDMA writes */ + struct ib_sge wr_tx_rdma_sge[SMC_IB_MAX_SEND_SGE]; +}; + +#define SMC_MAX_RDMA_WRITES 2 /* max. # of RDMA writes per + * message send + */ + +struct smc_rdma_sges { /* sges per message send */ + struct smc_rdma_sge tx_rdma_sge[SMC_MAX_RDMA_WRITES]; +}; + +struct smc_rdma_wr { /* work requests per message + * send + */ + struct ib_rdma_wr wr_tx_rdma[SMC_MAX_RDMA_WRITES]; +}; + struct smc_link { struct smc_ib_device *smcibdev; /* ib-device */ u8 ibport; /* port - values 1 | 2 */ @@ -64,6 +82,8 @@ struct smc_link { struct smc_wr_buf *wr_tx_bufs; /* WR send payload buffers */ struct ib_send_wr *wr_tx_ibs; /* WR send meta data */ struct ib_sge *wr_tx_sges; /* WR send gather meta data */ + struct smc_rdma_sges *wr_tx_rdma_sges;/*RDMA WRITE gather meta data*/ + struct smc_rdma_wr *wr_tx_rdmas; /* WR RDMA WRITE */ struct smc_wr_tx_pend *wr_tx_pends; /* WR send waiting for CQE */ /* above four vectors have wr_tx_cnt elements and use the same index */ dma_addr_t wr_tx_dma_addr; /* DMA address of wr_tx_bufs */ diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index dbf64a93d68a..371b4cf31fcd 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -38,6 +38,7 @@ static void smc_diag_msg_common_fill(struct smc_diag_msg *r, struct sock *sk) { struct smc_sock *smc = smc_sk(sk); + r->diag_family = sk->sk_family; if (!smc->clcsock) return; r->id.idiag_sport = htons(smc->clcsock->sk->sk_num); @@ -45,14 +46,12 @@ static void smc_diag_msg_common_fill(struct smc_diag_msg *r, struct sock *sk) r->id.idiag_if = smc->clcsock->sk->sk_bound_dev_if; sock_diag_save_cookie(sk, r->id.idiag_cookie); if (sk->sk_protocol == SMCPROTO_SMC) { - r->diag_family = PF_INET; memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src)); memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst)); r->id.idiag_src[0] = smc->clcsock->sk->sk_rcv_saddr; r->id.idiag_dst[0] = smc->clcsock->sk->sk_daddr; #if IS_ENABLED(CONFIG_IPV6) } else if (sk->sk_protocol == SMCPROTO_SMC6) { - r->diag_family = PF_INET6; memcpy(&r->id.idiag_src, &smc->clcsock->sk->sk_v6_rcv_saddr, sizeof(smc->clcsock->sk->sk_v6_rcv_saddr)); memcpy(&r->id.idiag_dst, &smc->clcsock->sk->sk_v6_daddr, diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index e519ef29c0ff..0b244be24fe0 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -257,12 +257,20 @@ static void smc_ib_global_event_handler(struct ib_event_handler *handler, smcibdev = container_of(handler, struct smc_ib_device, event_handler); switch (ibevent->event) { - case IB_EVENT_PORT_ERR: case IB_EVENT_DEVICE_FATAL: + /* terminate all ports on device */ + for (port_idx = 0; port_idx < SMC_MAX_PORTS; port_idx++) + set_bit(port_idx, &smcibdev->port_event_mask); + schedule_work(&smcibdev->port_event_work); + break; + case IB_EVENT_PORT_ERR: case IB_EVENT_PORT_ACTIVE: + case IB_EVENT_GID_CHANGE: port_idx = ibevent->element.port_num - 1; - set_bit(port_idx, &smcibdev->port_event_mask); - schedule_work(&smcibdev->port_event_work); + if (port_idx < SMC_MAX_PORTS) { + set_bit(port_idx, &smcibdev->port_event_mask); + schedule_work(&smcibdev->port_event_work); + } break; default: break; @@ -289,18 +297,18 @@ int smc_ib_create_protection_domain(struct smc_link *lnk) static void smc_ib_qp_event_handler(struct ib_event *ibevent, void *priv) { - struct smc_ib_device *smcibdev = - (struct smc_ib_device *)ibevent->device; + struct smc_link *lnk = (struct smc_link *)priv; + struct smc_ib_device *smcibdev = lnk->smcibdev; u8 port_idx; switch (ibevent->event) { - case IB_EVENT_DEVICE_FATAL: - case IB_EVENT_GID_CHANGE: - case IB_EVENT_PORT_ERR: + case IB_EVENT_QP_FATAL: case IB_EVENT_QP_ACCESS_ERR: - port_idx = ibevent->element.port_num - 1; - set_bit(port_idx, &smcibdev->port_event_mask); - schedule_work(&smcibdev->port_event_work); + port_idx = ibevent->element.qp->port - 1; + if (port_idx < SMC_MAX_PORTS) { + set_bit(port_idx, &smcibdev->port_event_mask); + schedule_work(&smcibdev->port_event_work); + } break; default: break; diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index a6d3623d06f4..4fd60c522802 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -166,7 +166,8 @@ static int smc_llc_add_pending_send(struct smc_link *link, { int rc; - rc = smc_wr_tx_get_free_slot(link, smc_llc_tx_handler, wr_buf, pend); + rc = smc_wr_tx_get_free_slot(link, smc_llc_tx_handler, wr_buf, NULL, + pend); if (rc < 0) return rc; BUILD_BUG_ON_MSG( diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 7cb3e4f07c10..632c3109dee5 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -27,7 +27,7 @@ static struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = { [SMC_PNETID_NAME] = { .type = NLA_NUL_STRING, - .len = SMC_MAX_PNETID_LEN - 1 + .len = SMC_MAX_PNETID_LEN }, [SMC_PNETID_ETHNAME] = { .type = NLA_NUL_STRING, diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index d8366ed51757..a3bff08ff8c8 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -24,10 +24,11 @@ #include "smc.h" #include "smc_wr.h" #include "smc_cdc.h" +#include "smc_close.h" #include "smc_ism.h" #include "smc_tx.h" -#define SMC_TX_WORK_DELAY HZ +#define SMC_TX_WORK_DELAY 0 #define SMC_TX_CORK_DELAY (HZ >> 2) /* 250 ms */ /***************************** sndbuf producer *******************************/ @@ -165,12 +166,11 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len) conn->local_tx_ctrl.prod_flags.urg_data_pending = 1; if (!atomic_read(&conn->sndbuf_space) || conn->urg_tx_pend) { + if (send_done) + return send_done; rc = smc_tx_wait(smc, msg->msg_flags); - if (rc) { - if (send_done) - return send_done; + if (rc) goto out_err; - } continue; } @@ -267,27 +267,23 @@ int smcd_tx_ism_write(struct smc_connection *conn, void *data, size_t len, /* sndbuf consumer: actual data transfer of one target chunk with RDMA write */ static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset, - int num_sges, struct ib_sge sges[]) + int num_sges, struct ib_rdma_wr *rdma_wr) { struct smc_link_group *lgr = conn->lgr; - struct ib_rdma_wr rdma_wr; struct smc_link *link; int rc; - memset(&rdma_wr, 0, sizeof(rdma_wr)); link = &lgr->lnk[SMC_SINGLE_LINK]; - rdma_wr.wr.wr_id = smc_wr_tx_get_next_wr_id(link); - rdma_wr.wr.sg_list = sges; - rdma_wr.wr.num_sge = num_sges; - rdma_wr.wr.opcode = IB_WR_RDMA_WRITE; - rdma_wr.remote_addr = + rdma_wr->wr.wr_id = smc_wr_tx_get_next_wr_id(link); + rdma_wr->wr.num_sge = num_sges; + rdma_wr->remote_addr = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].dma_addr + /* RMBE within RMB */ conn->tx_off + /* offset within RMBE */ peer_rmbe_offset; - rdma_wr.rkey = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey; - rc = ib_post_send(link->roce_qp, &rdma_wr.wr, NULL); + rdma_wr->rkey = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey; + rc = ib_post_send(link->roce_qp, &rdma_wr->wr, NULL); if (rc) { conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; smc_lgr_terminate(lgr); @@ -314,24 +310,25 @@ static inline void smc_tx_advance_cursors(struct smc_connection *conn, /* SMC-R helper for smc_tx_rdma_writes() */ static int smcr_tx_rdma_writes(struct smc_connection *conn, size_t len, size_t src_off, size_t src_len, - size_t dst_off, size_t dst_len) + size_t dst_off, size_t dst_len, + struct smc_rdma_wr *wr_rdma_buf) { dma_addr_t dma_addr = sg_dma_address(conn->sndbuf_desc->sgt[SMC_SINGLE_LINK].sgl); - struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK]; int src_len_sum = src_len, dst_len_sum = dst_len; - struct ib_sge sges[SMC_IB_MAX_SEND_SGE]; int sent_count = src_off; int srcchunk, dstchunk; int num_sges; int rc; for (dstchunk = 0; dstchunk < 2; dstchunk++) { + struct ib_sge *sge = + wr_rdma_buf->wr_tx_rdma[dstchunk].wr.sg_list; + num_sges = 0; for (srcchunk = 0; srcchunk < 2; srcchunk++) { - sges[srcchunk].addr = dma_addr + src_off; - sges[srcchunk].length = src_len; - sges[srcchunk].lkey = link->roce_pd->local_dma_lkey; + sge[srcchunk].addr = dma_addr + src_off; + sge[srcchunk].length = src_len; num_sges++; src_off += src_len; @@ -344,7 +341,8 @@ static int smcr_tx_rdma_writes(struct smc_connection *conn, size_t len, src_len = dst_len - src_len; /* remainder */ src_len_sum += src_len; } - rc = smc_tx_rdma_write(conn, dst_off, num_sges, sges); + rc = smc_tx_rdma_write(conn, dst_off, num_sges, + &wr_rdma_buf->wr_tx_rdma[dstchunk]); if (rc) return rc; if (dst_len_sum == len) @@ -403,7 +401,8 @@ static int smcd_tx_rdma_writes(struct smc_connection *conn, size_t len, /* sndbuf consumer: prepare all necessary (src&dst) chunks of data transmit; * usable snd_wnd as max transmit */ -static int smc_tx_rdma_writes(struct smc_connection *conn) +static int smc_tx_rdma_writes(struct smc_connection *conn, + struct smc_rdma_wr *wr_rdma_buf) { size_t len, src_len, dst_off, dst_len; /* current chunk values */ union smc_host_cursor sent, prep, prod, cons; @@ -464,7 +463,7 @@ static int smc_tx_rdma_writes(struct smc_connection *conn) dst_off, dst_len); else rc = smcr_tx_rdma_writes(conn, len, sent.count, src_len, - dst_off, dst_len); + dst_off, dst_len, wr_rdma_buf); if (rc) return rc; @@ -485,31 +484,30 @@ static int smc_tx_rdma_writes(struct smc_connection *conn) static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn) { struct smc_cdc_producer_flags *pflags; + struct smc_rdma_wr *wr_rdma_buf; struct smc_cdc_tx_pend *pend; struct smc_wr_buf *wr_buf; int rc; - spin_lock_bh(&conn->send_lock); - rc = smc_cdc_get_free_slot(conn, &wr_buf, &pend); + rc = smc_cdc_get_free_slot(conn, &wr_buf, &wr_rdma_buf, &pend); if (rc < 0) { if (rc == -EBUSY) { struct smc_sock *smc = container_of(conn, struct smc_sock, conn); - if (smc->sk.sk_err == ECONNABORTED) { - rc = sock_error(&smc->sk); - goto out_unlock; - } + if (smc->sk.sk_err == ECONNABORTED) + return sock_error(&smc->sk); rc = 0; if (conn->alert_token_local) /* connection healthy */ mod_delayed_work(system_wq, &conn->tx_work, SMC_TX_WORK_DELAY); } - goto out_unlock; + return rc; } + spin_lock_bh(&conn->send_lock); if (!conn->local_tx_ctrl.prod_flags.urg_data_present) { - rc = smc_tx_rdma_writes(conn); + rc = smc_tx_rdma_writes(conn, wr_rdma_buf); if (rc) { smc_wr_tx_put_slot(&conn->lgr->lnk[SMC_SINGLE_LINK], (struct smc_wr_tx_pend_priv *)pend); @@ -536,7 +534,7 @@ static int smcd_tx_sndbuf_nonempty(struct smc_connection *conn) spin_lock_bh(&conn->send_lock); if (!pflags->urg_data_present) - rc = smc_tx_rdma_writes(conn); + rc = smc_tx_rdma_writes(conn, NULL); if (!rc) rc = smcd_cdc_msg_send(conn); @@ -557,6 +555,12 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn) else rc = smcr_tx_sndbuf_nonempty(conn); + if (!rc) { + /* trigger socket release if connection is closing */ + struct smc_sock *smc = container_of(conn, struct smc_sock, + conn); + smc_close_wake_tx_prepared(smc); + } return rc; } @@ -598,7 +602,8 @@ void smc_tx_consumer_update(struct smc_connection *conn, bool force) if (to_confirm > conn->rmbe_update_limit) { smc_curs_copy(&prod, &conn->local_rx_ctrl.prod, conn); sender_free = conn->rmb_desc->len - - smc_curs_diff(conn->rmb_desc->len, &prod, &cfed); + smc_curs_diff_large(conn->rmb_desc->len, + &cfed, &prod); } if (conn->local_rx_ctrl.prod_flags.cons_curs_upd_req || @@ -612,9 +617,6 @@ void smc_tx_consumer_update(struct smc_connection *conn, bool force) SMC_TX_WORK_DELAY); return; } - smc_curs_copy(&conn->rx_curs_confirmed, - &conn->local_tx_ctrl.cons, conn); - conn->local_rx_ctrl.prod_flags.cons_curs_upd_req = 0; } if (conn->local_rx_ctrl.prod_flags.write_blocked && !atomic_read(&conn->bytes_to_rcv)) diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index c2694750a6a8..253aa75dc2b6 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -160,6 +160,7 @@ static inline int smc_wr_tx_get_free_slot_index(struct smc_link *link, u32 *idx) * @link: Pointer to smc_link used to later send the message. * @handler: Send completion handler function pointer. * @wr_buf: Out value returns pointer to message buffer. + * @wr_rdma_buf: Out value returns pointer to rdma work request. * @wr_pend_priv: Out value returns pointer serving as handler context. * * Return: 0 on success, or -errno on error. @@ -167,6 +168,7 @@ static inline int smc_wr_tx_get_free_slot_index(struct smc_link *link, u32 *idx) int smc_wr_tx_get_free_slot(struct smc_link *link, smc_wr_tx_handler handler, struct smc_wr_buf **wr_buf, + struct smc_rdma_wr **wr_rdma_buf, struct smc_wr_tx_pend_priv **wr_pend_priv) { struct smc_wr_tx_pend *wr_pend; @@ -204,6 +206,8 @@ int smc_wr_tx_get_free_slot(struct smc_link *link, wr_ib = &link->wr_tx_ibs[idx]; wr_ib->wr_id = wr_id; *wr_buf = &link->wr_tx_bufs[idx]; + if (wr_rdma_buf) + *wr_rdma_buf = &link->wr_tx_rdmas[idx]; *wr_pend_priv = &wr_pend->priv; return 0; } @@ -218,10 +222,10 @@ int smc_wr_tx_put_slot(struct smc_link *link, u32 idx = pend->idx; /* clear the full struct smc_wr_tx_pend including .priv */ - memset(&link->wr_tx_pends[pend->idx], 0, - sizeof(link->wr_tx_pends[pend->idx])); - memset(&link->wr_tx_bufs[pend->idx], 0, - sizeof(link->wr_tx_bufs[pend->idx])); + memset(&link->wr_tx_pends[idx], 0, + sizeof(link->wr_tx_pends[idx])); + memset(&link->wr_tx_bufs[idx], 0, + sizeof(link->wr_tx_bufs[idx])); test_and_clear_bit(idx, link->wr_tx_mask); return 1; } @@ -465,12 +469,26 @@ static void smc_wr_init_sge(struct smc_link *lnk) lnk->wr_tx_dma_addr + i * SMC_WR_BUF_SIZE; lnk->wr_tx_sges[i].length = SMC_WR_TX_SIZE; lnk->wr_tx_sges[i].lkey = lnk->roce_pd->local_dma_lkey; + lnk->wr_tx_rdma_sges[i].tx_rdma_sge[0].wr_tx_rdma_sge[0].lkey = + lnk->roce_pd->local_dma_lkey; + lnk->wr_tx_rdma_sges[i].tx_rdma_sge[0].wr_tx_rdma_sge[1].lkey = + lnk->roce_pd->local_dma_lkey; + lnk->wr_tx_rdma_sges[i].tx_rdma_sge[1].wr_tx_rdma_sge[0].lkey = + lnk->roce_pd->local_dma_lkey; + lnk->wr_tx_rdma_sges[i].tx_rdma_sge[1].wr_tx_rdma_sge[1].lkey = + lnk->roce_pd->local_dma_lkey; lnk->wr_tx_ibs[i].next = NULL; lnk->wr_tx_ibs[i].sg_list = &lnk->wr_tx_sges[i]; lnk->wr_tx_ibs[i].num_sge = 1; lnk->wr_tx_ibs[i].opcode = IB_WR_SEND; lnk->wr_tx_ibs[i].send_flags = IB_SEND_SIGNALED | IB_SEND_SOLICITED; + lnk->wr_tx_rdmas[i].wr_tx_rdma[0].wr.opcode = IB_WR_RDMA_WRITE; + lnk->wr_tx_rdmas[i].wr_tx_rdma[1].wr.opcode = IB_WR_RDMA_WRITE; + lnk->wr_tx_rdmas[i].wr_tx_rdma[0].wr.sg_list = + lnk->wr_tx_rdma_sges[i].tx_rdma_sge[0].wr_tx_rdma_sge; + lnk->wr_tx_rdmas[i].wr_tx_rdma[1].wr.sg_list = + lnk->wr_tx_rdma_sges[i].tx_rdma_sge[1].wr_tx_rdma_sge; } for (i = 0; i < lnk->wr_rx_cnt; i++) { lnk->wr_rx_sges[i].addr = @@ -521,8 +539,12 @@ void smc_wr_free_link_mem(struct smc_link *lnk) lnk->wr_tx_mask = NULL; kfree(lnk->wr_tx_sges); lnk->wr_tx_sges = NULL; + kfree(lnk->wr_tx_rdma_sges); + lnk->wr_tx_rdma_sges = NULL; kfree(lnk->wr_rx_sges); lnk->wr_rx_sges = NULL; + kfree(lnk->wr_tx_rdmas); + lnk->wr_tx_rdmas = NULL; kfree(lnk->wr_rx_ibs); lnk->wr_rx_ibs = NULL; kfree(lnk->wr_tx_ibs); @@ -552,10 +574,20 @@ int smc_wr_alloc_link_mem(struct smc_link *link) GFP_KERNEL); if (!link->wr_rx_ibs) goto no_mem_wr_tx_ibs; + link->wr_tx_rdmas = kcalloc(SMC_WR_BUF_CNT, + sizeof(link->wr_tx_rdmas[0]), + GFP_KERNEL); + if (!link->wr_tx_rdmas) + goto no_mem_wr_rx_ibs; + link->wr_tx_rdma_sges = kcalloc(SMC_WR_BUF_CNT, + sizeof(link->wr_tx_rdma_sges[0]), + GFP_KERNEL); + if (!link->wr_tx_rdma_sges) + goto no_mem_wr_tx_rdmas; link->wr_tx_sges = kcalloc(SMC_WR_BUF_CNT, sizeof(link->wr_tx_sges[0]), GFP_KERNEL); if (!link->wr_tx_sges) - goto no_mem_wr_rx_ibs; + goto no_mem_wr_tx_rdma_sges; link->wr_rx_sges = kcalloc(SMC_WR_BUF_CNT * 3, sizeof(link->wr_rx_sges[0]), GFP_KERNEL); @@ -579,6 +611,10 @@ no_mem_wr_rx_sges: kfree(link->wr_rx_sges); no_mem_wr_tx_sges: kfree(link->wr_tx_sges); +no_mem_wr_tx_rdma_sges: + kfree(link->wr_tx_rdma_sges); +no_mem_wr_tx_rdmas: + kfree(link->wr_tx_rdmas); no_mem_wr_rx_ibs: kfree(link->wr_rx_ibs); no_mem_wr_tx_ibs: diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h index 1d85bb14fd6f..09bf32fd3959 100644 --- a/net/smc/smc_wr.h +++ b/net/smc/smc_wr.h @@ -85,6 +85,7 @@ void smc_wr_add_dev(struct smc_ib_device *smcibdev); int smc_wr_tx_get_free_slot(struct smc_link *link, smc_wr_tx_handler handler, struct smc_wr_buf **wr_buf, + struct smc_rdma_wr **wrs, struct smc_wr_tx_pend_priv **wr_pend_priv); int smc_wr_tx_put_slot(struct smc_link *link, struct smc_wr_tx_pend_priv *wr_pend_priv); diff --git a/net/socket.c b/net/socket.c index e89884e2197b..643a1648fcc2 100644 --- a/net/socket.c +++ b/net/socket.c @@ -669,7 +669,7 @@ static bool skb_is_err_queue(const struct sk_buff *skb) * before the software timestamp is received, a hardware TX timestamp may be * returned only if there is no software TX timestamp. Ignore false software * timestamps, which may be made in the __sock_recv_timestamp() call when the - * option SO_TIMESTAMP(NS) is enabled on the socket, even when the skb has a + * option SO_TIMESTAMP_OLD(NS) is enabled on the socket, even when the skb has a * hardware timestamp. */ static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp) @@ -705,7 +705,9 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) { int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP); - struct scm_timestamping tss; + int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW); + struct scm_timestamping_internal tss; + int empty = 1, false_tstamp = 0; struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb); @@ -719,34 +721,54 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, if (need_software_tstamp) { if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) { - struct timeval tv; - skb_get_timestamp(skb, &tv); - put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, - sizeof(tv), &tv); + if (new_tstamp) { + struct __kernel_sock_timeval tv; + + skb_get_new_timestamp(skb, &tv); + put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW, + sizeof(tv), &tv); + } else { + struct __kernel_old_timeval tv; + + skb_get_timestamp(skb, &tv); + put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD, + sizeof(tv), &tv); + } } else { - struct timespec ts; - skb_get_timestampns(skb, &ts); - put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, - sizeof(ts), &ts); + if (new_tstamp) { + struct __kernel_timespec ts; + + skb_get_new_timestampns(skb, &ts); + put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW, + sizeof(ts), &ts); + } else { + struct timespec ts; + + skb_get_timestampns(skb, &ts); + put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD, + sizeof(ts), &ts); + } } } memset(&tss, 0, sizeof(tss)); if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) && - ktime_to_timespec_cond(skb->tstamp, tss.ts + 0)) + ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0)) empty = 0; if (shhwtstamps && (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) && !skb_is_swtx_tstamp(skb, false_tstamp) && - ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2)) { + ktime_to_timespec64_cond(shhwtstamps->hwtstamp, tss.ts + 2)) { empty = 0; if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) && !skb_is_err_queue(skb)) put_ts_pktinfo(msg, skb); } if (!empty) { - put_cmsg(msg, SOL_SOCKET, - SCM_TIMESTAMPING, sizeof(tss), &tss); + if (sock_flag(sk, SOCK_TSTAMP_NEW)) + put_cmsg_scm_timestamping64(msg, &tss); + else + put_cmsg_scm_timestamping(msg, &tss); if (skb_is_err_queue(skb) && skb->len && SKB_EXT_ERR(skb)->opt_stats) @@ -941,8 +963,7 @@ void dlci_ioctl_set(int (*hook) (unsigned int, void __user *)) EXPORT_SYMBOL(dlci_ioctl_set); static long sock_do_ioctl(struct net *net, struct socket *sock, - unsigned int cmd, unsigned long arg, - unsigned int ifreq_size) + unsigned int cmd, unsigned long arg) { int err; void __user *argp = (void __user *)arg; @@ -968,11 +989,11 @@ static long sock_do_ioctl(struct net *net, struct socket *sock, } else { struct ifreq ifr; bool need_copyout; - if (copy_from_user(&ifr, argp, ifreq_size)) + if (copy_from_user(&ifr, argp, sizeof(struct ifreq))) return -EFAULT; err = dev_ioctl(net, cmd, &ifr, &need_copyout); if (!err && need_copyout) - if (copy_to_user(argp, &ifr, ifreq_size)) + if (copy_to_user(argp, &ifr, sizeof(struct ifreq))) return -EFAULT; } return err; @@ -1071,8 +1092,7 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) err = open_related_ns(&net->ns, get_net_ns); break; default: - err = sock_do_ioctl(net, sock, cmd, arg, - sizeof(struct ifreq)); + err = sock_do_ioctl(net, sock, cmd, arg); break; } return err; @@ -2780,8 +2800,7 @@ static int do_siocgstamp(struct net *net, struct socket *sock, int err; set_fs(KERNEL_DS); - err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv, - sizeof(struct compat_ifreq)); + err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv); set_fs(old_fs); if (!err) err = compat_put_timeval(&ktv, up); @@ -2797,8 +2816,7 @@ static int do_siocgstampns(struct net *net, struct socket *sock, int err; set_fs(KERNEL_DS); - err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts, - sizeof(struct compat_ifreq)); + err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts); set_fs(old_fs); if (!err) err = compat_put_timespec(&kts, up); @@ -2994,6 +3012,54 @@ static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd, return dev_ioctl(net, cmd, &ifreq, NULL); } +static int compat_ifreq_ioctl(struct net *net, struct socket *sock, + unsigned int cmd, + struct compat_ifreq __user *uifr32) +{ + struct ifreq __user *uifr; + int err; + + /* Handle the fact that while struct ifreq has the same *layout* on + * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data, + * which are handled elsewhere, it still has different *size* due to + * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit, + * resulting in struct ifreq being 32 and 40 bytes respectively). + * As a result, if the struct happens to be at the end of a page and + * the next page isn't readable/writable, we get a fault. To prevent + * that, copy back and forth to the full size. + */ + + uifr = compat_alloc_user_space(sizeof(*uifr)); + if (copy_in_user(uifr, uifr32, sizeof(*uifr32))) + return -EFAULT; + + err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr); + + if (!err) { + switch (cmd) { + case SIOCGIFFLAGS: + case SIOCGIFMETRIC: + case SIOCGIFMTU: + case SIOCGIFMEM: + case SIOCGIFHWADDR: + case SIOCGIFINDEX: + case SIOCGIFADDR: + case SIOCGIFBRDADDR: + case SIOCGIFDSTADDR: + case SIOCGIFNETMASK: + case SIOCGIFPFLAGS: + case SIOCGIFTXQLEN: + case SIOCGMIIPHY: + case SIOCGMIIREG: + case SIOCGIFNAME: + if (copy_in_user(uifr32, uifr, sizeof(*uifr32))) + err = -EFAULT; + break; + } + } + return err; +} + static int compat_sioc_ifmap(struct net *net, unsigned int cmd, struct compat_ifreq __user *uifr32) { @@ -3109,8 +3175,7 @@ static int routing_ioctl(struct net *net, struct socket *sock, } set_fs(KERNEL_DS); - ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r, - sizeof(struct compat_ifreq)); + ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r); set_fs(old_fs); out: @@ -3210,21 +3275,22 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, case SIOCSIFTXQLEN: case SIOCBRADDIF: case SIOCBRDELIF: + case SIOCGIFNAME: case SIOCSIFNAME: case SIOCGMIIPHY: case SIOCGMIIREG: case SIOCSMIIREG: - case SIOCSARP: - case SIOCGARP: - case SIOCDARP: - case SIOCATMARK: case SIOCBONDENSLAVE: case SIOCBONDRELEASE: case SIOCBONDSETHWADDR: case SIOCBONDCHANGEACTIVE: - case SIOCGIFNAME: - return sock_do_ioctl(net, sock, cmd, arg, - sizeof(struct compat_ifreq)); + return compat_ifreq_ioctl(net, sock, cmd, argp); + + case SIOCSARP: + case SIOCGARP: + case SIOCDARP: + case SIOCATMARK: + return sock_do_ioctl(net, sock, cmd, arg); } return -ENOIOCTLCMD; diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index cf51b8f9b15f..1f200119268c 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -537,6 +537,99 @@ void svc_rdma_sync_reply_hdr(struct svcxprt_rdma *rdma, DMA_TO_DEVICE); } +/* If the xdr_buf has more elements than the device can + * transmit in a single RDMA Send, then the reply will + * have to be copied into a bounce buffer. + */ +static bool svc_rdma_pull_up_needed(struct svcxprt_rdma *rdma, + struct xdr_buf *xdr, + __be32 *wr_lst) +{ + int elements; + + /* xdr->head */ + elements = 1; + + /* xdr->pages */ + if (!wr_lst) { + unsigned int remaining; + unsigned long pageoff; + + pageoff = xdr->page_base & ~PAGE_MASK; + remaining = xdr->page_len; + while (remaining) { + ++elements; + remaining -= min_t(u32, PAGE_SIZE - pageoff, + remaining); + pageoff = 0; + } + } + + /* xdr->tail */ + if (xdr->tail[0].iov_len) + ++elements; + + /* assume 1 SGE is needed for the transport header */ + return elements >= rdma->sc_max_send_sges; +} + +/* The device is not capable of sending the reply directly. + * Assemble the elements of @xdr into the transport header + * buffer. + */ +static int svc_rdma_pull_up_reply_msg(struct svcxprt_rdma *rdma, + struct svc_rdma_send_ctxt *ctxt, + struct xdr_buf *xdr, __be32 *wr_lst) +{ + unsigned char *dst, *tailbase; + unsigned int taillen; + + dst = ctxt->sc_xprt_buf; + dst += ctxt->sc_sges[0].length; + + memcpy(dst, xdr->head[0].iov_base, xdr->head[0].iov_len); + dst += xdr->head[0].iov_len; + + tailbase = xdr->tail[0].iov_base; + taillen = xdr->tail[0].iov_len; + if (wr_lst) { + u32 xdrpad; + + xdrpad = xdr_padsize(xdr->page_len); + if (taillen && xdrpad) { + tailbase += xdrpad; + taillen -= xdrpad; + } + } else { + unsigned int len, remaining; + unsigned long pageoff; + struct page **ppages; + + ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT); + pageoff = xdr->page_base & ~PAGE_MASK; + remaining = xdr->page_len; + while (remaining) { + len = min_t(u32, PAGE_SIZE - pageoff, remaining); + + memcpy(dst, page_address(*ppages), len); + remaining -= len; + dst += len; + pageoff = 0; + } + } + + if (taillen) + memcpy(dst, tailbase, taillen); + + ctxt->sc_sges[0].length += xdr->len; + ib_dma_sync_single_for_device(rdma->sc_pd->device, + ctxt->sc_sges[0].addr, + ctxt->sc_sges[0].length, + DMA_TO_DEVICE); + + return 0; +} + /* svc_rdma_map_reply_msg - Map the buffer holding RPC message * @rdma: controlling transport * @ctxt: send_ctxt for the Send WR @@ -559,8 +652,10 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, u32 xdr_pad; int ret; - if (++ctxt->sc_cur_sge_no >= rdma->sc_max_send_sges) - return -EIO; + if (svc_rdma_pull_up_needed(rdma, xdr, wr_lst)) + return svc_rdma_pull_up_reply_msg(rdma, ctxt, xdr, wr_lst); + + ++ctxt->sc_cur_sge_no; ret = svc_rdma_dma_map_buf(rdma, ctxt, xdr->head[0].iov_base, xdr->head[0].iov_len); @@ -591,8 +686,7 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, while (remaining) { len = min_t(u32, PAGE_SIZE - page_off, remaining); - if (++ctxt->sc_cur_sge_no >= rdma->sc_max_send_sges) - return -EIO; + ++ctxt->sc_cur_sge_no; ret = svc_rdma_dma_map_page(rdma, ctxt, *ppages++, page_off, len); if (ret < 0) @@ -606,8 +700,7 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, len = xdr->tail[0].iov_len; tail: if (len) { - if (++ctxt->sc_cur_sge_no >= rdma->sc_max_send_sges) - return -EIO; + ++ctxt->sc_cur_sge_no; ret = svc_rdma_dma_map_buf(rdma, ctxt, base, len); if (ret < 0) return ret; diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 924c17d46903..57f86c63a463 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -419,12 +419,9 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) /* Transport header, head iovec, tail iovec */ newxprt->sc_max_send_sges = 3; /* Add one SGE per page list entry */ - newxprt->sc_max_send_sges += svcrdma_max_req_size / PAGE_SIZE; - if (newxprt->sc_max_send_sges > dev->attrs.max_send_sge) { - pr_err("svcrdma: too few Send SGEs available (%d needed)\n", - newxprt->sc_max_send_sges); - goto errout; - } + newxprt->sc_max_send_sges += (svcrdma_max_req_size / PAGE_SIZE) + 1; + if (newxprt->sc_max_send_sges > dev->attrs.max_send_sge) + newxprt->sc_max_send_sges = dev->attrs.max_send_sge; newxprt->sc_max_req_size = svcrdma_max_req_size; newxprt->sc_max_requests = svcrdma_max_requests; newxprt->sc_max_bc_requests = svcrdma_max_bc_requests; diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index cd78253de31d..7e1357db33d7 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -592,26 +592,6 @@ int call_switchdev_blocking_notifiers(unsigned long val, struct net_device *dev, } EXPORT_SYMBOL_GPL(call_switchdev_blocking_notifiers); -bool switchdev_port_same_parent_id(struct net_device *a, - struct net_device *b) -{ - struct switchdev_attr a_attr = { - .orig_dev = a, - .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, - }; - struct switchdev_attr b_attr = { - .orig_dev = b, - .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, - }; - - if (switchdev_port_attr_get(a, &a_attr) || - switchdev_port_attr_get(b, &b_attr)) - return false; - - return netdev_phys_item_id_same(&a_attr.u.ppid, &b_attr.u.ppid); -} -EXPORT_SYMBOL_GPL(switchdev_port_same_parent_id); - static int __switchdev_handle_port_obj_add(struct net_device *dev, struct switchdev_notifier_port_obj_info *port_obj_info, bool (*check_cb)(const struct net_device *dev), diff --git a/net/tipc/link.c b/net/tipc/link.c index ac306d17f8ad..341ecd796aa4 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1145,7 +1145,7 @@ static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb, default: pr_warn("Dropping received illegal msg type\n"); kfree_skb(skb); - return false; + return true; }; } @@ -1425,6 +1425,10 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, l->rcv_unacked = 0; } else { /* RESET_MSG or ACTIVATE_MSG */ + if (mtyp == ACTIVATE_MSG) { + msg_set_dest_session_valid(hdr, 1); + msg_set_dest_session(hdr, l->peer_session); + } msg_set_max_pkt(hdr, l->advertised_mtu); strcpy(data, l->if_name); msg_set_size(hdr, INT_H_SIZE + TIPC_MAX_IF_NAME); @@ -1642,6 +1646,17 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT); break; } + + /* If this endpoint was re-created while peer was ESTABLISHING + * it doesn't know current session number. Force re-synch. + */ + if (mtyp == ACTIVATE_MSG && msg_dest_session_valid(hdr) && + l->session != msg_dest_session(hdr)) { + if (less(l->session, msg_dest_session(hdr))) + l->session = msg_dest_session(hdr) + 1; + break; + } + /* ACTIVATE_MSG serves as PEER_RESET if link is already down */ if (mtyp == RESET_MSG || !link_is_up(l)) rc = tipc_link_fsm_evt(l, LINK_PEER_RESET_EVT); diff --git a/net/tipc/msg.h b/net/tipc/msg.h index a0924956bb61..d7e4b8b93f9d 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -360,6 +360,28 @@ static inline void msg_set_bcast_ack(struct tipc_msg *m, u16 n) msg_set_bits(m, 1, 0, 0xffff, n); } +/* Note: reusing bits in word 1 for ACTIVATE_MSG only, to re-synch + * link peer session number + */ +static inline bool msg_dest_session_valid(struct tipc_msg *m) +{ + return msg_bits(m, 1, 16, 0x1); +} + +static inline void msg_set_dest_session_valid(struct tipc_msg *m, bool valid) +{ + msg_set_bits(m, 1, 16, 0x1, valid); +} + +static inline u16 msg_dest_session(struct tipc_msg *m) +{ + return msg_bits(m, 1, 0, 0xffff); +} + +static inline void msg_set_dest_session(struct tipc_msg *m, u16 n) +{ + msg_set_bits(m, 1, 0, 0xffff, n); +} /* * Word 2 diff --git a/net/tipc/node.c b/net/tipc/node.c index db2a6c3e0be9..2dc4919ab23c 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -830,15 +830,16 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete) tipc_node_write_lock(n); if (!tipc_link_is_establishing(l)) { __tipc_node_link_down(n, &bearer_id, &xmitq, &maddr); - if (delete) { - kfree(l); - le->link = NULL; - n->link_cnt--; - } } else { /* Defuse pending tipc_node_link_up() */ + tipc_link_reset(l); tipc_link_fsm_evt(l, LINK_RESET_EVT); } + if (delete) { + kfree(l); + le->link = NULL; + n->link_cnt--; + } trace_tipc_node_link_down(n, true, "node link down or deleted!"); tipc_node_write_unlock(n); if (delete) diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index d753e362d2d9..7ee9008b2187 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -257,7 +257,8 @@ static int tls_push_record(struct sock *sk, tls_fill_prepend(ctx, skb_frag_address(frag), record->len - ctx->tx.prepend_size, - record_type); + record_type, + ctx->crypto_send.info.version); /* HW doesn't care about the data in the tag, because it fills it. */ dummy_tag_frag.page = skb_frag_page(frag); @@ -270,7 +271,7 @@ static int tls_push_record(struct sock *sk, spin_unlock_irq(&offload_ctx->lock); offload_ctx->open_record = NULL; set_bit(TLS_PENDING_CLOSED_RECORD, &ctx->flags); - tls_advance_record_sn(sk, &ctx->tx); + tls_advance_record_sn(sk, &ctx->tx, ctx->crypto_send.info.version); for (i = 0; i < record->num_frags; i++) { frag = &record->frags[i]; diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c index 450a6dbc5a88..54c3a758f2a7 100644 --- a/net/tls/tls_device_fallback.c +++ b/net/tls/tls_device_fallback.c @@ -73,7 +73,8 @@ static int tls_enc_record(struct aead_request *aead_req, len -= TLS_CIPHER_AES_GCM_128_IV_SIZE; tls_make_aad(aad, len - TLS_CIPHER_AES_GCM_128_TAG_SIZE, - (char *)&rcd_sn, sizeof(rcd_sn), buf[0]); + (char *)&rcd_sn, sizeof(rcd_sn), buf[0], + TLS_1_2_VERSION); memcpy(iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, buf + TLS_HEADER_SIZE, TLS_CIPHER_AES_GCM_128_IV_SIZE); diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index d36d095cbcf0..d1c2fd9a3f63 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -372,6 +372,30 @@ static int do_tls_getsockopt_tx(struct sock *sk, char __user *optval, rc = -EFAULT; break; } + case TLS_CIPHER_AES_GCM_256: { + struct tls12_crypto_info_aes_gcm_256 * + crypto_info_aes_gcm_256 = + container_of(crypto_info, + struct tls12_crypto_info_aes_gcm_256, + info); + + if (len != sizeof(*crypto_info_aes_gcm_256)) { + rc = -EINVAL; + goto out; + } + lock_sock(sk); + memcpy(crypto_info_aes_gcm_256->iv, + ctx->tx.iv + TLS_CIPHER_AES_GCM_256_SALT_SIZE, + TLS_CIPHER_AES_GCM_256_IV_SIZE); + memcpy(crypto_info_aes_gcm_256->rec_seq, ctx->tx.rec_seq, + TLS_CIPHER_AES_GCM_256_REC_SEQ_SIZE); + release_sock(sk); + if (copy_to_user(optval, + crypto_info_aes_gcm_256, + sizeof(*crypto_info_aes_gcm_256))) + rc = -EFAULT; + break; + } default: rc = -EINVAL; } @@ -412,6 +436,7 @@ static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval, { struct tls_crypto_info *crypto_info; struct tls_context *ctx = tls_get_ctx(sk); + size_t optsize; int rc = 0; int conf; @@ -438,14 +463,19 @@ static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval, } /* check version */ - if (crypto_info->version != TLS_1_2_VERSION) { + if (crypto_info->version != TLS_1_2_VERSION && + crypto_info->version != TLS_1_3_VERSION) { rc = -ENOTSUPP; goto err_crypto_info; } switch (crypto_info->cipher_type) { - case TLS_CIPHER_AES_GCM_128: { - if (optlen != sizeof(struct tls12_crypto_info_aes_gcm_128)) { + case TLS_CIPHER_AES_GCM_128: + case TLS_CIPHER_AES_GCM_256: { + optsize = crypto_info->cipher_type == TLS_CIPHER_AES_GCM_128 ? + sizeof(struct tls12_crypto_info_aes_gcm_128) : + sizeof(struct tls12_crypto_info_aes_gcm_256); + if (optlen != optsize) { rc = -EINVAL; goto err_crypto_info; } diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 86b9527c4826..ae4784734547 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -120,6 +120,34 @@ static int skb_nsg(struct sk_buff *skb, int offset, int len) return __skb_nsg(skb, offset, len, 0); } +static int padding_length(struct tls_sw_context_rx *ctx, + struct tls_context *tls_ctx, struct sk_buff *skb) +{ + struct strp_msg *rxm = strp_msg(skb); + int sub = 0; + + /* Determine zero-padding length */ + if (tls_ctx->crypto_recv.info.version == TLS_1_3_VERSION) { + char content_type = 0; + int err; + int back = 17; + + while (content_type == 0) { + if (back > rxm->full_len) + return -EBADMSG; + err = skb_copy_bits(skb, + rxm->offset + rxm->full_len - back, + &content_type, 1); + if (content_type) + break; + sub++; + back++; + } + ctx->control = content_type; + } + return sub; +} + static void tls_decrypt_done(struct crypto_async_request *req, int err) { struct aead_request *aead_req = (struct aead_request *)req; @@ -142,7 +170,7 @@ static void tls_decrypt_done(struct crypto_async_request *req, int err) tls_err_abort(skb->sk, err); } else { struct strp_msg *rxm = strp_msg(skb); - + rxm->full_len -= padding_length(ctx, tls_ctx, skb); rxm->offset += tls_ctx->rx.prepend_size; rxm->full_len -= tls_ctx->rx.overhead_size; } @@ -185,7 +213,7 @@ static int tls_do_decryption(struct sock *sk, int ret; aead_request_set_tfm(aead_req, ctx->aead_recv); - aead_request_set_ad(aead_req, TLS_AAD_SPACE_SIZE); + aead_request_set_ad(aead_req, tls_ctx->rx.aad_size); aead_request_set_crypt(aead_req, sgin, sgout, data_len + tls_ctx->rx.tag_size, (u8 *)iv_recv); @@ -289,12 +317,12 @@ static struct tls_rec *tls_get_rec(struct sock *sk) sg_init_table(rec->sg_aead_in, 2); sg_set_buf(&rec->sg_aead_in[0], rec->aad_space, - sizeof(rec->aad_space)); + tls_ctx->tx.aad_size); sg_unmark_end(&rec->sg_aead_in[1]); sg_init_table(rec->sg_aead_out, 2); sg_set_buf(&rec->sg_aead_out[0], rec->aad_space, - sizeof(rec->aad_space)); + tls_ctx->tx.aad_size); sg_unmark_end(&rec->sg_aead_out[1]); return rec; @@ -447,16 +475,20 @@ static int tls_do_encryption(struct sock *sk, struct scatterlist *sge = sk_msg_elem(msg_en, start); int rc; + memcpy(rec->iv_data, tls_ctx->tx.iv, sizeof(rec->iv_data)); + xor_iv_with_seq(tls_ctx->crypto_send.info.version, rec->iv_data, + tls_ctx->tx.rec_seq); + sge->offset += tls_ctx->tx.prepend_size; sge->length -= tls_ctx->tx.prepend_size; msg_en->sg.curr = start; aead_request_set_tfm(aead_req, ctx->aead_send); - aead_request_set_ad(aead_req, TLS_AAD_SPACE_SIZE); + aead_request_set_ad(aead_req, tls_ctx->tx.aad_size); aead_request_set_crypt(aead_req, rec->sg_aead_in, rec->sg_aead_out, - data_len, tls_ctx->tx.iv); + data_len, rec->iv_data); aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG, tls_encrypt_done, sk); @@ -481,7 +513,8 @@ static int tls_do_encryption(struct sock *sk, /* Unhook the record from context if encryption is not failure */ ctx->open_rec = NULL; - tls_advance_record_sn(sk, &tls_ctx->tx); + tls_advance_record_sn(sk, &tls_ctx->tx, + tls_ctx->crypto_send.info.version); return rc; } @@ -638,7 +671,17 @@ static int tls_push_record(struct sock *sk, int flags, i = msg_pl->sg.end; sk_msg_iter_var_prev(i); - sg_mark_end(sk_msg_elem(msg_pl, i)); + + rec->content_type = record_type; + if (tls_ctx->crypto_send.info.version == TLS_1_3_VERSION) { + /* Add content type to end of message. No padding added */ + sg_set_buf(&rec->sg_content_type, &rec->content_type, 1); + sg_mark_end(&rec->sg_content_type); + sg_chain(msg_pl->sg.data, msg_pl->sg.end + 1, + &rec->sg_content_type); + } else { + sg_mark_end(sk_msg_elem(msg_pl, i)); + } i = msg_pl->sg.start; sg_chain(rec->sg_aead_in, 2, rec->inplace_crypto ? @@ -651,18 +694,22 @@ static int tls_push_record(struct sock *sk, int flags, i = msg_en->sg.start; sg_chain(rec->sg_aead_out, 2, &msg_en->sg.data[i]); - tls_make_aad(rec->aad_space, msg_pl->sg.size, + tls_make_aad(rec->aad_space, msg_pl->sg.size + tls_ctx->tx.tail_size, tls_ctx->tx.rec_seq, tls_ctx->tx.rec_seq_size, - record_type); + record_type, + tls_ctx->crypto_send.info.version); tls_fill_prepend(tls_ctx, page_address(sg_page(&msg_en->sg.data[i])) + - msg_en->sg.data[i].offset, msg_pl->sg.size, - record_type); + msg_en->sg.data[i].offset, + msg_pl->sg.size + tls_ctx->tx.tail_size, + record_type, + tls_ctx->crypto_send.info.version); tls_ctx->pending_open_record_frags = false; - rc = tls_do_encryption(sk, tls_ctx, ctx, req, msg_pl->sg.size, i); + rc = tls_do_encryption(sk, tls_ctx, ctx, req, + msg_pl->sg.size + tls_ctx->tx.tail_size, i); if (rc < 0) { if (rc != -EINPROGRESS) { tls_err_abort(sk, EBADMSG); @@ -671,6 +718,7 @@ static int tls_push_record(struct sock *sk, int flags, tls_merge_open_record(sk, rec, tmp, orig_end); } } + ctx->async_capable = 1; return rc; } else if (split) { msg_pl = &tmp->msg_plaintext; @@ -812,8 +860,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) long timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); - struct crypto_tfm *tfm = crypto_aead_tfm(ctx->aead_send); - bool async_capable = tfm->__crt_alg->cra_flags & CRYPTO_ALG_ASYNC; + bool async_capable = ctx->async_capable; unsigned char record_type = TLS_RECORD_TYPE_DATA; bool is_kvec = iov_iter_is_kvec(&msg->msg_iter); bool eor = !(msg->msg_flags & MSG_MORE); @@ -1290,7 +1337,8 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, u8 *aad, *iv, *mem = NULL; struct scatterlist *sgin = NULL; struct scatterlist *sgout = NULL; - const int data_len = rxm->full_len - tls_ctx->rx.overhead_size; + const int data_len = rxm->full_len - tls_ctx->rx.overhead_size + + tls_ctx->rx.tail_size; if (*zc && (out_iov || out_sg)) { if (out_iov) @@ -1315,7 +1363,7 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, aead_size = sizeof(*aead_req) + crypto_aead_reqsize(ctx->aead_recv); mem_size = aead_size + (nsg * sizeof(struct scatterlist)); - mem_size = mem_size + TLS_AAD_SPACE_SIZE; + mem_size = mem_size + tls_ctx->rx.aad_size; mem_size = mem_size + crypto_aead_ivsize(ctx->aead_recv); /* Allocate a single block of memory which contains @@ -1331,7 +1379,7 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, sgin = (struct scatterlist *)(mem + aead_size); sgout = sgin + n_sgin; aad = (u8 *)(sgout + n_sgout); - iv = aad + TLS_AAD_SPACE_SIZE; + iv = aad + tls_ctx->rx.aad_size; /* Prepare IV */ err = skb_copy_bits(skb, rxm->offset + TLS_HEADER_SIZE, @@ -1341,16 +1389,24 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, kfree(mem); return err; } - memcpy(iv, tls_ctx->rx.iv, TLS_CIPHER_AES_GCM_128_SALT_SIZE); + if (tls_ctx->crypto_recv.info.version == TLS_1_3_VERSION) + memcpy(iv, tls_ctx->rx.iv, crypto_aead_ivsize(ctx->aead_recv)); + else + memcpy(iv, tls_ctx->rx.iv, TLS_CIPHER_AES_GCM_128_SALT_SIZE); + + xor_iv_with_seq(tls_ctx->crypto_recv.info.version, iv, + tls_ctx->rx.rec_seq); /* Prepare AAD */ - tls_make_aad(aad, rxm->full_len - tls_ctx->rx.overhead_size, + tls_make_aad(aad, rxm->full_len - tls_ctx->rx.overhead_size + + tls_ctx->rx.tail_size, tls_ctx->rx.rec_seq, tls_ctx->rx.rec_seq_size, - ctx->control); + ctx->control, + tls_ctx->crypto_recv.info.version); /* Prepare sgin */ sg_init_table(sgin, n_sgin); - sg_set_buf(&sgin[0], aad, TLS_AAD_SPACE_SIZE); + sg_set_buf(&sgin[0], aad, tls_ctx->rx.aad_size); err = skb_to_sgvec(skb, &sgin[1], rxm->offset + tls_ctx->rx.prepend_size, rxm->full_len - tls_ctx->rx.prepend_size); @@ -1362,7 +1418,7 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, if (n_sgout) { if (out_iov) { sg_init_table(sgout, n_sgout); - sg_set_buf(&sgout[0], aad, TLS_AAD_SPACE_SIZE); + sg_set_buf(&sgout[0], aad, tls_ctx->rx.aad_size); *chunk = 0; err = tls_setup_from_iter(sk, out_iov, data_len, @@ -1403,6 +1459,7 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb, { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); + int version = tls_ctx->crypto_recv.info.version; struct strp_msg *rxm = strp_msg(skb); int err = 0; @@ -1415,20 +1472,23 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb, err = decrypt_internal(sk, skb, dest, NULL, chunk, zc, async); if (err < 0) { if (err == -EINPROGRESS) - tls_advance_record_sn(sk, &tls_ctx->rx); + tls_advance_record_sn(sk, &tls_ctx->rx, + version); return err; } + + rxm->full_len -= padding_length(ctx, tls_ctx, skb); + + rxm->offset += tls_ctx->rx.prepend_size; + rxm->full_len -= tls_ctx->rx.overhead_size; + tls_advance_record_sn(sk, &tls_ctx->rx, version); + ctx->decrypted = true; + ctx->saved_data_ready(sk); } else { *zc = false; } - rxm->offset += tls_ctx->rx.prepend_size; - rxm->full_len -= tls_ctx->rx.overhead_size; - tls_advance_record_sn(sk, &tls_ctx->rx); - ctx->decrypted = true; - ctx->saved_data_ready(sk); - return err; } @@ -1585,10 +1645,10 @@ int tls_sw_recvmsg(struct sock *sk, do { bool retain_skb = false; - bool async = false; bool zc = false; int to_decrypt; int chunk = 0; + bool async; skb = tls_wait_data(sk, psock, flags, timeo, &err); if (!skb) { @@ -1607,6 +1667,29 @@ int tls_sw_recvmsg(struct sock *sk, rxm = strp_msg(skb); + to_decrypt = rxm->full_len - tls_ctx->rx.overhead_size; + + if (to_decrypt <= len && !is_kvec && !is_peek && + ctx->control == TLS_RECORD_TYPE_DATA && + tls_ctx->crypto_recv.info.version != TLS_1_3_VERSION) + zc = true; + + /* Do not use async mode if record is non-data */ + if (ctx->control == TLS_RECORD_TYPE_DATA) + async = ctx->async_capable; + else + async = false; + + err = decrypt_skb_update(sk, skb, &msg->msg_iter, + &chunk, &zc, async); + if (err < 0 && err != -EINPROGRESS) { + tls_err_abort(sk, EBADMSG); + goto recv_end; + } + + if (err == -EINPROGRESS) + num_async++; + if (!cmsg) { int cerr; @@ -1624,40 +1707,25 @@ int tls_sw_recvmsg(struct sock *sk, goto recv_end; } - to_decrypt = rxm->full_len - tls_ctx->rx.overhead_size; - - if (to_decrypt <= len && !is_kvec && !is_peek) - zc = true; - - err = decrypt_skb_update(sk, skb, &msg->msg_iter, - &chunk, &zc, ctx->async_capable); - if (err < 0 && err != -EINPROGRESS) { - tls_err_abort(sk, EBADMSG); - goto recv_end; - } - - if (err == -EINPROGRESS) { - async = true; - num_async++; + if (async) goto pick_next_record; - } else { - if (!zc) { - if (rxm->full_len > len) { - retain_skb = true; - chunk = len; - } else { - chunk = rxm->full_len; - } - err = skb_copy_datagram_msg(skb, rxm->offset, - msg, chunk); - if (err < 0) - goto recv_end; + if (!zc) { + if (rxm->full_len > len) { + retain_skb = true; + chunk = len; + } else { + chunk = rxm->full_len; + } - if (!is_peek) { - rxm->offset = rxm->offset + chunk; - rxm->full_len = rxm->full_len - chunk; - } + err = skb_copy_datagram_msg(skb, rxm->offset, + msg, chunk); + if (err < 0) + goto recv_end; + + if (!is_peek) { + rxm->offset = rxm->offset + chunk; + rxm->full_len = rxm->full_len - chunk; } } @@ -1757,15 +1825,15 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, if (!skb) goto splice_read_end; - /* splice does not support reading control messages */ - if (ctx->control != TLS_RECORD_TYPE_DATA) { - err = -ENOTSUPP; - goto splice_read_end; - } - if (!ctx->decrypted) { err = decrypt_skb_update(sk, skb, NULL, &chunk, &zc, false); + /* splice does not support reading control messages */ + if (ctx->control != TLS_RECORD_TYPE_DATA) { + err = -ENOTSUPP; + goto splice_read_end; + } + if (err < 0) { tls_err_abort(sk, EBADMSG); goto splice_read_end; @@ -1833,9 +1901,12 @@ static int tls_read_size(struct strparser *strp, struct sk_buff *skb) data_len = ((header[4] & 0xFF) | (header[3] << 8)); - cipher_overhead = tls_ctx->rx.tag_size + tls_ctx->rx.iv_size; + cipher_overhead = tls_ctx->rx.tag_size; + if (tls_ctx->crypto_recv.info.version != TLS_1_3_VERSION) + cipher_overhead += tls_ctx->rx.iv_size; - if (data_len > TLS_MAX_PAYLOAD_SIZE + cipher_overhead) { + if (data_len > TLS_MAX_PAYLOAD_SIZE + cipher_overhead + + tls_ctx->rx.tail_size) { ret = -EMSGSIZE; goto read_failure; } @@ -1844,12 +1915,12 @@ static int tls_read_size(struct strparser *strp, struct sk_buff *skb) goto read_failure; } - if (header[1] != TLS_VERSION_MINOR(tls_ctx->crypto_recv.info.version) || - header[2] != TLS_VERSION_MAJOR(tls_ctx->crypto_recv.info.version)) { + /* Note that both TLS1.3 and TLS1.2 use TLS_1_2 version here */ + if (header[1] != TLS_1_2_VERSION_MINOR || + header[2] != TLS_1_2_VERSION_MAJOR) { ret = -EINVAL; goto read_failure; } - #ifdef CONFIG_TLS_DEVICE handle_device_resync(strp->sk, TCP_SKB_CB(skb)->seq + rxm->offset, *(u64*)tls_ctx->rx.rec_seq); @@ -1901,7 +1972,9 @@ void tls_sw_free_resources_tx(struct sock *sk) if (atomic_read(&ctx->encrypt_pending)) crypto_wait_req(-EINPROGRESS, &ctx->async_wait); + release_sock(sk); cancel_delayed_work_sync(&ctx->tx_work.work); + lock_sock(sk); /* Tx whatever records we can transmit and abandon the rest */ tls_tx_records(sk, -1); @@ -1995,6 +2068,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) { struct tls_crypto_info *crypto_info; struct tls12_crypto_info_aes_gcm_128 *gcm_128_info; + struct tls12_crypto_info_aes_gcm_256 *gcm_256_info; struct tls_sw_context_tx *sw_ctx_tx = NULL; struct tls_sw_context_rx *sw_ctx_rx = NULL; struct cipher_context *cctx; @@ -2002,7 +2076,8 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) struct strp_callbacks cb; u16 nonce_size, tag_size, iv_size, rec_seq_size; struct crypto_tfm *tfm; - char *iv, *rec_seq; + char *iv, *rec_seq, *key, *salt; + size_t keysize; int rc = 0; if (!ctx) { @@ -2063,6 +2138,24 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) ((struct tls12_crypto_info_aes_gcm_128 *)crypto_info)->rec_seq; gcm_128_info = (struct tls12_crypto_info_aes_gcm_128 *)crypto_info; + keysize = TLS_CIPHER_AES_GCM_128_KEY_SIZE; + key = gcm_128_info->key; + salt = gcm_128_info->salt; + break; + } + case TLS_CIPHER_AES_GCM_256: { + nonce_size = TLS_CIPHER_AES_GCM_256_IV_SIZE; + tag_size = TLS_CIPHER_AES_GCM_256_TAG_SIZE; + iv_size = TLS_CIPHER_AES_GCM_256_IV_SIZE; + iv = ((struct tls12_crypto_info_aes_gcm_256 *)crypto_info)->iv; + rec_seq_size = TLS_CIPHER_AES_GCM_256_REC_SEQ_SIZE; + rec_seq = + ((struct tls12_crypto_info_aes_gcm_256 *)crypto_info)->rec_seq; + gcm_256_info = + (struct tls12_crypto_info_aes_gcm_256 *)crypto_info; + keysize = TLS_CIPHER_AES_GCM_256_KEY_SIZE; + key = gcm_256_info->key; + salt = gcm_256_info->salt; break; } default: @@ -2076,9 +2169,19 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) goto free_priv; } + if (crypto_info->version == TLS_1_3_VERSION) { + nonce_size = 0; + cctx->aad_size = TLS_HEADER_SIZE; + cctx->tail_size = 1; + } else { + cctx->aad_size = TLS_AAD_SPACE_SIZE; + cctx->tail_size = 0; + } + cctx->prepend_size = TLS_HEADER_SIZE + nonce_size; cctx->tag_size = tag_size; - cctx->overhead_size = cctx->prepend_size + cctx->tag_size; + cctx->overhead_size = cctx->prepend_size + cctx->tag_size + + cctx->tail_size; cctx->iv_size = iv_size; cctx->iv = kmalloc(iv_size + TLS_CIPHER_AES_GCM_128_SALT_SIZE, GFP_KERNEL); @@ -2086,7 +2189,8 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) rc = -ENOMEM; goto free_priv; } - memcpy(cctx->iv, gcm_128_info->salt, TLS_CIPHER_AES_GCM_128_SALT_SIZE); + /* Note: 128 & 256 bit salt are the same size */ + memcpy(cctx->iv, salt, TLS_CIPHER_AES_GCM_128_SALT_SIZE); memcpy(cctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv, iv_size); cctx->rec_seq_size = rec_seq_size; cctx->rec_seq = kmemdup(rec_seq, rec_seq_size, GFP_KERNEL); @@ -2106,8 +2210,8 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) ctx->push_pending_record = tls_sw_push_pending_record; - rc = crypto_aead_setkey(*aead, gcm_128_info->key, - TLS_CIPHER_AES_GCM_128_KEY_SIZE); + rc = crypto_aead_setkey(*aead, key, keysize); + if (rc) goto free_aead; @@ -2117,8 +2221,12 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) if (sw_ctx_rx) { tfm = crypto_aead_tfm(sw_ctx_rx->aead_recv); - sw_ctx_rx->async_capable = - tfm->__crt_alg->cra_flags & CRYPTO_ALG_ASYNC; + + if (crypto_info->version == TLS_1_3_VERSION) + sw_ctx_rx->async_capable = false; + else + sw_ctx_rx->async_capable = + tfm->__crt_alg->cra_flags & CRYPTO_ALG_ASYNC; /* Set up strparser */ memset(&cb, 0, sizeof(cb)); diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index a60df252d3cc..d892000770cf 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1439,7 +1439,7 @@ static int vsock_stream_setsockopt(struct socket *sock, break; case SO_VM_SOCKETS_CONNECT_TIMEOUT: { - struct timeval tv; + struct __kernel_old_timeval tv; COPY_IN(tv); if (tv.tv_sec >= 0 && tv.tv_usec < USEC_PER_SEC && tv.tv_sec < (MAX_SCHEDULE_TIMEOUT / HZ - 1)) { @@ -1517,7 +1517,7 @@ static int vsock_stream_getsockopt(struct socket *sock, break; case SO_VM_SOCKETS_CONNECT_TIMEOUT: { - struct timeval tv; + struct __kernel_old_timeval tv; tv.tv_sec = vsk->connect_timeout / HZ; tv.tv_usec = (vsk->connect_timeout - diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index 5d3cce9e8744..15eb5d3d4750 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -75,6 +75,9 @@ static u32 virtio_transport_get_local_cid(void) { struct virtio_vsock *vsock = virtio_vsock_get(); + if (!vsock) + return VMADDR_CID_ANY; + return vsock->guest_cid; } @@ -584,10 +587,6 @@ static int virtio_vsock_probe(struct virtio_device *vdev) virtio_vsock_update_guest_cid(vsock); - ret = vsock_core_init(&virtio_transport.transport); - if (ret < 0) - goto out_vqs; - vsock->rx_buf_nr = 0; vsock->rx_buf_max_nr = 0; atomic_set(&vsock->queued_replies, 0); @@ -618,8 +617,6 @@ static int virtio_vsock_probe(struct virtio_device *vdev) mutex_unlock(&the_virtio_vsock_mutex); return 0; -out_vqs: - vsock->vdev->config->del_vqs(vsock->vdev); out: kfree(vsock); mutex_unlock(&the_virtio_vsock_mutex); @@ -637,6 +634,9 @@ static void virtio_vsock_remove(struct virtio_device *vdev) flush_work(&vsock->event_work); flush_work(&vsock->send_pkt_work); + /* Reset all connected sockets when the device disappear */ + vsock_for_each_connected_socket(virtio_vsock_reset_sock); + vdev->config->reset(vdev); mutex_lock(&vsock->rx_lock); @@ -669,7 +669,6 @@ static void virtio_vsock_remove(struct virtio_device *vdev) mutex_lock(&the_virtio_vsock_mutex); the_virtio_vsock = NULL; - vsock_core_exit(); mutex_unlock(&the_virtio_vsock_mutex); vdev->config->del_vqs(vdev); @@ -702,14 +701,28 @@ static int __init virtio_vsock_init(void) virtio_vsock_workqueue = alloc_workqueue("virtio_vsock", 0, 0); if (!virtio_vsock_workqueue) return -ENOMEM; + ret = register_virtio_driver(&virtio_vsock_driver); if (ret) - destroy_workqueue(virtio_vsock_workqueue); + goto out_wq; + + ret = vsock_core_init(&virtio_transport.transport); + if (ret) + goto out_vdr; + + return 0; + +out_vdr: + unregister_virtio_driver(&virtio_vsock_driver); +out_wq: + destroy_workqueue(virtio_vsock_workqueue); return ret; + } static void __exit virtio_vsock_exit(void) { + vsock_core_exit(); unregister_virtio_driver(&virtio_vsock_driver); destroy_workqueue(virtio_vsock_workqueue); } diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index c361ce782412..c3d5ab01fba7 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -1651,6 +1651,10 @@ static void vmci_transport_cleanup(struct work_struct *work) static void vmci_transport_destruct(struct vsock_sock *vsk) { + /* transport can be NULL if we hit a failure at init() time */ + if (!vmci_trans(vsk)) + return; + /* Ensure that the detach callback doesn't use the sk/vsk * we are about to destruct. */ diff --git a/net/wireless/ap.c b/net/wireless/ap.c index 882d97bdc6bf..550ac9d827fe 100644 --- a/net/wireless/ap.c +++ b/net/wireless/ap.c @@ -41,6 +41,8 @@ int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev, cfg80211_sched_dfs_chan_update(rdev); } + schedule_work(&cfg80211_disconnect_work); + return err; } diff --git a/net/wireless/core.c b/net/wireless/core.c index 623dfe5e211c..b36ad8efb5e5 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -1068,6 +1068,8 @@ static void __cfg80211_unregister_wdev(struct wireless_dev *wdev, bool sync) ASSERT_RTNL(); + flush_work(&wdev->pmsr_free_wk); + nl80211_notify_iface(rdev, wdev, NL80211_CMD_DEL_INTERFACE); list_del_rcu(&wdev->list); diff --git a/net/wireless/core.h b/net/wireless/core.h index c5d6f3418601..f6b40563dc63 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -445,6 +445,8 @@ void cfg80211_process_wdev_events(struct wireless_dev *wdev); bool cfg80211_does_bw_fit_range(const struct ieee80211_freq_range *freq_range, u32 center_freq_khz, u32 bw_khz); +extern struct work_struct cfg80211_disconnect_work; + /** * cfg80211_chandef_dfs_usable - checks if chandef is DFS usable * @wiphy: the wiphy to validate against diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 74150ad95823..e36437abc45a 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -250,7 +250,7 @@ nl80211_pmsr_ftm_req_attr_policy[NL80211_PMSR_FTM_REQ_ATTR_MAX + 1] = { [NL80211_PMSR_FTM_REQ_ATTR_BURST_DURATION] = NLA_POLICY_MAX(NLA_U8, 15), [NL80211_PMSR_FTM_REQ_ATTR_FTMS_PER_BURST] = - NLA_POLICY_MAX(NLA_U8, 15), + NLA_POLICY_MAX(NLA_U8, 31), [NL80211_PMSR_FTM_REQ_ATTR_NUM_FTMR_RETRIES] = { .type = NLA_U8 }, [NL80211_PMSR_FTM_REQ_ATTR_REQUEST_LCI] = { .type = NLA_FLAG }, [NL80211_PMSR_FTM_REQ_ATTR_REQUEST_CIVICLOC] = { .type = NLA_FLAG }, @@ -259,15 +259,13 @@ nl80211_pmsr_ftm_req_attr_policy[NL80211_PMSR_FTM_REQ_ATTR_MAX + 1] = { static const struct nla_policy nl80211_pmsr_req_data_policy[NL80211_PMSR_TYPE_MAX + 1] = { [NL80211_PMSR_TYPE_FTM] = - NLA_POLICY_NESTED(NL80211_PMSR_FTM_REQ_ATTR_MAX, - nl80211_pmsr_ftm_req_attr_policy), + NLA_POLICY_NESTED(nl80211_pmsr_ftm_req_attr_policy), }; static const struct nla_policy nl80211_pmsr_req_attr_policy[NL80211_PMSR_REQ_ATTR_MAX + 1] = { [NL80211_PMSR_REQ_ATTR_DATA] = - NLA_POLICY_NESTED(NL80211_PMSR_TYPE_MAX, - nl80211_pmsr_req_data_policy), + NLA_POLICY_NESTED(nl80211_pmsr_req_data_policy), [NL80211_PMSR_REQ_ATTR_GET_AP_TSF] = { .type = NLA_FLAG }, }; @@ -280,8 +278,7 @@ nl80211_psmr_peer_attr_policy[NL80211_PMSR_PEER_ATTR_MAX + 1] = { */ [NL80211_PMSR_PEER_ATTR_CHAN] = { .type = NLA_NESTED }, [NL80211_PMSR_PEER_ATTR_REQ] = - NLA_POLICY_NESTED(NL80211_PMSR_REQ_ATTR_MAX, - nl80211_pmsr_req_attr_policy), + NLA_POLICY_NESTED(nl80211_pmsr_req_attr_policy), [NL80211_PMSR_PEER_ATTR_RESP] = { .type = NLA_REJECT }, }; @@ -292,8 +289,7 @@ nl80211_pmsr_attr_policy[NL80211_PMSR_ATTR_MAX + 1] = { [NL80211_PMSR_ATTR_RANDOMIZE_MAC_ADDR] = { .type = NLA_REJECT }, [NL80211_PMSR_ATTR_TYPE_CAPA] = { .type = NLA_REJECT }, [NL80211_PMSR_ATTR_PEERS] = - NLA_POLICY_NESTED_ARRAY(NL80211_PMSR_PEER_ATTR_MAX, - nl80211_psmr_peer_attr_policy), + NLA_POLICY_NESTED_ARRAY(nl80211_psmr_peer_attr_policy), }; const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { @@ -555,8 +551,8 @@ const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { }, [NL80211_ATTR_TIMEOUT] = NLA_POLICY_MIN(NLA_U32, 1), [NL80211_ATTR_PEER_MEASUREMENTS] = - NLA_POLICY_NESTED(NL80211_PMSR_ATTR_MAX, - nl80211_pmsr_attr_policy), + NLA_POLICY_NESTED(nl80211_pmsr_attr_policy), + [NL80211_ATTR_AIRTIME_WEIGHT] = NLA_POLICY_MIN(NLA_U16, 1), }; /* policy for the key attributes */ @@ -2278,6 +2274,15 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, if (nl80211_send_pmsr_capa(rdev, msg)) goto nla_put_failure; + state->split_start++; + break; + case 15: + if (rdev->wiphy.akm_suites && + nla_put(msg, NL80211_ATTR_AKM_SUITES, + sizeof(u32) * rdev->wiphy.n_akm_suites, + rdev->wiphy.akm_suites)) + goto nla_put_failure; + /* done */ state->split_start = 0; break; @@ -4540,6 +4545,9 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) nl80211_calculate_ap_params(¶ms); + if (info->attrs[NL80211_ATTR_EXTERNAL_AUTH_SUPPORT]) + params.flags |= AP_SETTINGS_EXTERNAL_AUTH_SUPPORT; + wdev_lock(wdev); err = rdev_start_ap(rdev, dev, ¶ms); if (!err) { @@ -4851,6 +4859,11 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid, PUT_SINFO(PLID, plid, u16); PUT_SINFO(PLINK_STATE, plink_state, u8); PUT_SINFO_U64(RX_DURATION, rx_duration); + PUT_SINFO_U64(TX_DURATION, tx_duration); + + if (wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_AIRTIME_FAIRNESS)) + PUT_SINFO(AIRTIME_WEIGHT, airtime_weight, u16); switch (rdev->wiphy.signal_type) { case CFG80211_SIGNAL_TYPE_MBM: @@ -5470,6 +5483,15 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) nla_get_u8(info->attrs[NL80211_ATTR_OPMODE_NOTIF]); } + if (info->attrs[NL80211_ATTR_AIRTIME_WEIGHT]) + params.airtime_weight = + nla_get_u16(info->attrs[NL80211_ATTR_AIRTIME_WEIGHT]); + + if (params.airtime_weight && + !wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_AIRTIME_FAIRNESS)) + return -EOPNOTSUPP; + /* Include parameters for TDLS peer (will check later) */ err = nl80211_set_station_tdls(info, ¶ms); if (err) @@ -5598,6 +5620,15 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) params.plink_action = nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]); + if (info->attrs[NL80211_ATTR_AIRTIME_WEIGHT]) + params.airtime_weight = + nla_get_u16(info->attrs[NL80211_ATTR_AIRTIME_WEIGHT]); + + if (params.airtime_weight && + !wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_AIRTIME_FAIRNESS)) + return -EOPNOTSUPP; + err = nl80211_parse_sta_channel_info(info, ¶ms); if (err) return err; @@ -5803,7 +5834,13 @@ static int nl80211_send_mpath(struct sk_buff *msg, u32 portid, u32 seq, pinfo->discovery_timeout)) || ((pinfo->filled & MPATH_INFO_DISCOVERY_RETRIES) && nla_put_u8(msg, NL80211_MPATH_INFO_DISCOVERY_RETRIES, - pinfo->discovery_retries))) + pinfo->discovery_retries)) || + ((pinfo->filled & MPATH_INFO_HOP_COUNT) && + nla_put_u8(msg, NL80211_MPATH_INFO_HOP_COUNT, + pinfo->hop_count)) || + ((pinfo->filled & MPATH_INFO_PATH_CHANGE) && + nla_put_u32(msg, NL80211_MPATH_INFO_PATH_CHANGE, + pinfo->path_change_count))) goto nla_put_failure; nla_nest_end(msg, pinfoattr); @@ -9857,7 +9894,10 @@ static int nl80211_setdel_pmksa(struct sk_buff *skb, struct genl_info *info) } if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT) + dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT && + !(dev->ieee80211_ptr->iftype == NL80211_IFTYPE_AP && + wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_AP_PMKSA_CACHING))) return -EOPNOTSUPP; switch (info->genlhdr->cmd) { @@ -13047,7 +13087,9 @@ static int nl80211_external_auth(struct sk_buff *skb, struct genl_info *info) if (!rdev->ops->external_auth) return -EOPNOTSUPP; - if (!info->attrs[NL80211_ATTR_SSID]) + if (!info->attrs[NL80211_ATTR_SSID] && + dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && + dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) return -EINVAL; if (!info->attrs[NL80211_ATTR_BSSID]) @@ -13058,18 +13100,24 @@ static int nl80211_external_auth(struct sk_buff *skb, struct genl_info *info) memset(¶ms, 0, sizeof(params)); - params.ssid.ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]); - if (params.ssid.ssid_len == 0 || - params.ssid.ssid_len > IEEE80211_MAX_SSID_LEN) - return -EINVAL; - memcpy(params.ssid.ssid, nla_data(info->attrs[NL80211_ATTR_SSID]), - params.ssid.ssid_len); + if (info->attrs[NL80211_ATTR_SSID]) { + params.ssid.ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]); + if (params.ssid.ssid_len == 0 || + params.ssid.ssid_len > IEEE80211_MAX_SSID_LEN) + return -EINVAL; + memcpy(params.ssid.ssid, + nla_data(info->attrs[NL80211_ATTR_SSID]), + params.ssid.ssid_len); + } memcpy(params.bssid, nla_data(info->attrs[NL80211_ATTR_BSSID]), ETH_ALEN); params.status = nla_get_u16(info->attrs[NL80211_ATTR_STATUS_CODE]); + if (info->attrs[NL80211_ATTR_PMKID]) + params.pmkid = nla_data(info->attrs[NL80211_ATTR_PMKID]); + return rdev_external_auth(rdev, dev, ¶ms); } diff --git a/net/wireless/pmsr.c b/net/wireless/pmsr.c index de9286703280..0216ab555249 100644 --- a/net/wireless/pmsr.c +++ b/net/wireless/pmsr.c @@ -256,8 +256,7 @@ int nl80211_pmsr_start(struct sk_buff *skb, struct genl_info *info) if (err) goto out_err; } else { - memcpy(req->mac_addr, nla_data(info->attrs[NL80211_ATTR_MAC]), - ETH_ALEN); + memcpy(req->mac_addr, wdev_address(wdev), ETH_ALEN); memset(req->mac_addr_mask, 0xff, ETH_ALEN); } @@ -272,6 +271,7 @@ int nl80211_pmsr_start(struct sk_buff *skb, struct genl_info *info) req->n_peers = count; req->cookie = cfg80211_assign_cookie(rdev); + req->nl_portid = info->snd_portid; err = rdev_start_pmsr(rdev, wdev, req); if (err) @@ -530,14 +530,14 @@ free: } EXPORT_SYMBOL_GPL(cfg80211_pmsr_report); -void cfg80211_pmsr_free_wk(struct work_struct *work) +static void cfg80211_pmsr_process_abort(struct wireless_dev *wdev) { - struct wireless_dev *wdev = container_of(work, struct wireless_dev, - pmsr_free_wk); struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_pmsr_request *req, *tmp; LIST_HEAD(free_list); + lockdep_assert_held(&wdev->mtx); + spin_lock_bh(&wdev->pmsr_lock); list_for_each_entry_safe(req, tmp, &wdev->pmsr_list, list) { if (req->nl_portid) @@ -547,14 +547,22 @@ void cfg80211_pmsr_free_wk(struct work_struct *work) spin_unlock_bh(&wdev->pmsr_lock); list_for_each_entry_safe(req, tmp, &free_list, list) { - wdev_lock(wdev); rdev_abort_pmsr(rdev, wdev, req); - wdev_unlock(wdev); kfree(req); } } +void cfg80211_pmsr_free_wk(struct work_struct *work) +{ + struct wireless_dev *wdev = container_of(work, struct wireless_dev, + pmsr_free_wk); + + wdev_lock(wdev); + cfg80211_pmsr_process_abort(wdev); + wdev_unlock(wdev); +} + void cfg80211_pmsr_wdev_down(struct wireless_dev *wdev) { struct cfg80211_pmsr_request *req; @@ -568,8 +576,8 @@ void cfg80211_pmsr_wdev_down(struct wireless_dev *wdev) spin_unlock_bh(&wdev->pmsr_lock); if (found) - schedule_work(&wdev->pmsr_free_wk); - flush_work(&wdev->pmsr_free_wk); + cfg80211_pmsr_process_abort(wdev); + WARN_ON(!list_empty(&wdev->pmsr_list)); } diff --git a/net/wireless/reg.c b/net/wireless/reg.c index dd58b9909ac9..adfa58fa6536 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -2729,9 +2729,7 @@ static void notify_self_managed_wiphys(struct regulatory_request *request) list_for_each_entry(rdev, &cfg80211_rdev_list, list) { wiphy = &rdev->wiphy; if (wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED && - request->initiator == NL80211_REGDOM_SET_BY_USER && - request->user_reg_hint_type == - NL80211_USER_REG_HINT_CELL_BASE) + request->initiator == NL80211_REGDOM_SET_BY_USER) reg_call_notifier(wiphy, request); } } diff --git a/net/wireless/sme.c b/net/wireless/sme.c index f741d8376a46..7d34cb884840 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -667,7 +667,7 @@ static void disconnect_work(struct work_struct *work) rtnl_unlock(); } -static DECLARE_WORK(cfg80211_disconnect_work, disconnect_work); +DECLARE_WORK(cfg80211_disconnect_work, disconnect_work); /* diff --git a/net/wireless/util.c b/net/wireless/util.c index cd48cdd582c0..ec30e3732c7b 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -5,7 +5,7 @@ * Copyright 2007-2009 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright 2017 Intel Deutschland GmbH - * Copyright (C) 2018 Intel Corporation + * Copyright (C) 2018-2019 Intel Corporation */ #include <linux/export.h> #include <linux/bitops.h> @@ -19,6 +19,7 @@ #include <linux/mpls.h> #include <linux/gcd.h> #include <linux/bitfield.h> +#include <linux/nospec.h> #include "core.h" #include "rdev-ops.h" @@ -715,20 +716,25 @@ unsigned int cfg80211_classify8021d(struct sk_buff *skb, { unsigned int dscp; unsigned char vlan_priority; + unsigned int ret; /* skb->priority values from 256->263 are magic values to * directly indicate a specific 802.1d priority. This is used * to allow 802.1d priority to be passed directly in from VLAN * tags, etc. */ - if (skb->priority >= 256 && skb->priority <= 263) - return skb->priority - 256; + if (skb->priority >= 256 && skb->priority <= 263) { + ret = skb->priority - 256; + goto out; + } if (skb_vlan_tag_present(skb)) { vlan_priority = (skb_vlan_tag_get(skb) & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; - if (vlan_priority > 0) - return vlan_priority; + if (vlan_priority > 0) { + ret = vlan_priority; + goto out; + } } switch (skb->protocol) { @@ -747,8 +753,9 @@ unsigned int cfg80211_classify8021d(struct sk_buff *skb, if (!mpls) return 0; - return (ntohl(mpls->entry) & MPLS_LS_TC_MASK) + ret = (ntohl(mpls->entry) & MPLS_LS_TC_MASK) >> MPLS_LS_TC_SHIFT; + goto out; } case htons(ETH_P_80221): /* 802.21 is always network control traffic */ @@ -761,18 +768,24 @@ unsigned int cfg80211_classify8021d(struct sk_buff *skb, unsigned int i, tmp_dscp = dscp >> 2; for (i = 0; i < qos_map->num_des; i++) { - if (tmp_dscp == qos_map->dscp_exception[i].dscp) - return qos_map->dscp_exception[i].up; + if (tmp_dscp == qos_map->dscp_exception[i].dscp) { + ret = qos_map->dscp_exception[i].up; + goto out; + } } for (i = 0; i < 8; i++) { if (tmp_dscp >= qos_map->up[i].low && - tmp_dscp <= qos_map->up[i].high) - return i; + tmp_dscp <= qos_map->up[i].high) { + ret = i; + goto out; + } } } - return dscp >> 5; + ret = dscp >> 5; +out: + return array_index_nospec(ret, IEEE80211_NUM_TIDS); } EXPORT_SYMBOL(cfg80211_classify8021d); diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 06943d9c9835..d522787c7354 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -1337,6 +1337,7 @@ static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev) wstats.qual.qual = sig + 110; break; } + /* fall through */ case CFG80211_SIGNAL_TYPE_UNSPEC: if (sinfo.filled & BIT_ULL(NL80211_STA_INFO_SIGNAL)) { wstats.qual.updated |= IW_QUAL_LEVEL_UPDATED; @@ -1345,6 +1346,7 @@ static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev) wstats.qual.qual = sinfo.signal; break; } + /* fall through */ default: wstats.qual.updated |= IW_QUAL_LEVEL_INVALID; wstats.qual.updated |= IW_QUAL_QUAL_INVALID; diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 5121729b8b63..ec3a828672ef 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -352,17 +352,15 @@ static unsigned int x25_new_lci(struct x25_neigh *nb) unsigned int lci = 1; struct sock *sk; - read_lock_bh(&x25_list_lock); - - while ((sk = __x25_find_socket(lci, nb)) != NULL) { + while ((sk = x25_find_socket(lci, nb)) != NULL) { sock_put(sk); if (++lci == 4096) { lci = 0; break; } + cond_resched(); } - read_unlock_bh(&x25_list_lock); return lci; } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 934492bad8e0..ba0a4048c846 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -680,16 +680,6 @@ static void xfrm_hash_resize(struct work_struct *work) mutex_unlock(&hash_resize_mutex); } -static void xfrm_hash_reset_inexact_table(struct net *net) -{ - struct xfrm_pol_inexact_bin *b; - - lockdep_assert_held(&net->xfrm.xfrm_policy_lock); - - list_for_each_entry(b, &net->xfrm.inexact_bins, inexact_bins) - INIT_HLIST_HEAD(&b->hhead); -} - /* Make sure *pol can be inserted into fastbin. * Useful to check that later insert requests will be sucessful * (provided xfrm_policy_lock is held throughout). @@ -833,13 +823,13 @@ static void xfrm_policy_inexact_list_reinsert(struct net *net, u16 family) { unsigned int matched_s, matched_d; - struct hlist_node *newpos = NULL; struct xfrm_policy *policy, *p; matched_s = 0; matched_d = 0; list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) { + struct hlist_node *newpos = NULL; bool matches_s, matches_d; if (!policy->bydst_reinsert) @@ -849,16 +839,19 @@ static void xfrm_policy_inexact_list_reinsert(struct net *net, policy->bydst_reinsert = false; hlist_for_each_entry(p, &n->hhead, bydst) { - if (policy->priority >= p->priority) + if (policy->priority > p->priority) + newpos = &p->bydst; + else if (policy->priority == p->priority && + policy->pos > p->pos) newpos = &p->bydst; else break; } if (newpos) - hlist_add_behind(&policy->bydst, newpos); + hlist_add_behind_rcu(&policy->bydst, newpos); else - hlist_add_head(&policy->bydst, &n->hhead); + hlist_add_head_rcu(&policy->bydst, &n->hhead); /* paranoia checks follow. * Check that the reinserted policy matches at least @@ -893,12 +886,13 @@ static void xfrm_policy_inexact_node_reinsert(struct net *net, struct rb_root *new, u16 family) { - struct rb_node **p, *parent = NULL; struct xfrm_pol_inexact_node *node; + struct rb_node **p, *parent; /* we should not have another subtree here */ WARN_ON_ONCE(!RB_EMPTY_ROOT(&n->root)); - +restart: + parent = NULL; p = &new->rb_node; while (*p) { u8 prefixlen; @@ -918,12 +912,11 @@ static void xfrm_policy_inexact_node_reinsert(struct net *net, } else { struct xfrm_policy *tmp; - hlist_for_each_entry(tmp, &node->hhead, bydst) - tmp->bydst_reinsert = true; - hlist_for_each_entry(tmp, &n->hhead, bydst) + hlist_for_each_entry(tmp, &n->hhead, bydst) { tmp->bydst_reinsert = true; + hlist_del_rcu(&tmp->bydst); + } - INIT_HLIST_HEAD(&node->hhead); xfrm_policy_inexact_list_reinsert(net, node, family); if (node->prefixlen == n->prefixlen) { @@ -935,8 +928,7 @@ static void xfrm_policy_inexact_node_reinsert(struct net *net, kfree_rcu(n, rcu); n = node; n->prefixlen = prefixlen; - *p = new->rb_node; - parent = NULL; + goto restart; } } @@ -965,12 +957,11 @@ static void xfrm_policy_inexact_node_merge(struct net *net, family); } - hlist_for_each_entry(tmp, &v->hhead, bydst) - tmp->bydst_reinsert = true; - hlist_for_each_entry(tmp, &n->hhead, bydst) + hlist_for_each_entry(tmp, &v->hhead, bydst) { tmp->bydst_reinsert = true; + hlist_del_rcu(&tmp->bydst); + } - INIT_HLIST_HEAD(&n->hhead); xfrm_policy_inexact_list_reinsert(net, n, family); } @@ -1235,6 +1226,7 @@ static void xfrm_hash_rebuild(struct work_struct *work) } while (read_seqretry(&net->xfrm.policy_hthresh.lock, seq)); spin_lock_bh(&net->xfrm.xfrm_policy_lock); + write_seqcount_begin(&xfrm_policy_hash_generation); /* make sure that we can insert the indirect policies again before * we start with destructive action. @@ -1278,10 +1270,14 @@ static void xfrm_hash_rebuild(struct work_struct *work) } /* reset the bydst and inexact table in all directions */ - xfrm_hash_reset_inexact_table(net); - for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { - INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]); + struct hlist_node *n; + + hlist_for_each_entry_safe(policy, n, + &net->xfrm.policy_inexact[dir], + bydst_inexact_list) + hlist_del_init(&policy->bydst_inexact_list); + hmask = net->xfrm.policy_bydst[dir].hmask; odst = net->xfrm.policy_bydst[dir].table; for (i = hmask; i >= 0; i--) @@ -1313,6 +1309,9 @@ static void xfrm_hash_rebuild(struct work_struct *work) newpos = NULL; chain = policy_hash_bysel(net, &policy->selector, policy->family, dir); + + hlist_del_rcu(&policy->bydst); + if (!chain) { void *p = xfrm_policy_inexact_insert(policy, dir, 0); @@ -1334,6 +1333,7 @@ static void xfrm_hash_rebuild(struct work_struct *work) out_unlock: __xfrm_policy_inexact_flush(net); + write_seqcount_end(&xfrm_policy_hash_generation); spin_unlock_bh(&net->xfrm.xfrm_policy_lock); mutex_unlock(&hash_resize_mutex); @@ -2600,7 +2600,10 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, dst_copy_metrics(dst1, dst); if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) { - __u32 mark = xfrm_smark_get(fl->flowi_mark, xfrm[i]); + __u32 mark = 0; + + if (xfrm[i]->props.smark.v || xfrm[i]->props.smark.m) + mark = xfrm_smark_get(fl->flowi_mark, xfrm[i]); family = xfrm[i]->props.family; dst = xfrm_dst_lookup(xfrm[i], tos, fl->flowi_oif, diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 277c1c46fe94..c6d26afcf89d 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1488,10 +1488,15 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family) if (!ut[i].family) ut[i].family = family; - if ((ut[i].mode == XFRM_MODE_TRANSPORT) && - (ut[i].family != prev_family)) - return -EINVAL; - + switch (ut[i].mode) { + case XFRM_MODE_TUNNEL: + case XFRM_MODE_BEET: + break; + default: + if (ut[i].family != prev_family) + return -EINVAL; + break; + } if (ut[i].mode >= XFRM_MODE_MAX) return -EINVAL; |