summaryrefslogtreecommitdiff
path: root/net/core
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-07-14 19:24:32 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2021-07-14 19:24:32 +0300
commit8096acd7442e613fad0354fc8dfdb2003cceea0b (patch)
treead8b748475fa87fe7c3b6f9cd00da8d7b8d078bd /net/core
parentd1d488d813703618f0dd93f0e4c4a05928114aa8 (diff)
parentbcb9928a155444dbd212473e60241ca0a7f641e1 (diff)
downloadlinux-8096acd7442e613fad0354fc8dfdb2003cceea0b.tar.xz
Merge tag 'net-5.14-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Pull networking fixes from Jakub Kicinski. "Including fixes from bpf and netfilter. Current release - regressions: - sock: fix parameter order in sock_setsockopt() Current release - new code bugs: - netfilter: nft_last: - fix incorrect arithmetic when restoring last used - honor NFTA_LAST_SET on restoration Previous releases - regressions: - udp: properly flush normal packet at GRO time - sfc: ensure correct number of XDP queues; don't allow enabling the feature if there isn't sufficient resources to Tx from any CPU - dsa: sja1105: fix address learning getting disabled on the CPU port - mptcp: addresses a rmem accounting issue that could keep packets in subflow receive buffers longer than necessary, delaying MPTCP-level ACKs - ip_tunnel: fix mtu calculation for ETHER tunnel devices - do not reuse skbs allocated from skbuff_fclone_cache in the napi skb cache, we'd try to return them to the wrong slab cache - tcp: consistently disable header prediction for mptcp Previous releases - always broken: - bpf: fix subprog poke descriptor tracking use-after-free - ipv6: - allocate enough headroom in ip6_finish_output2() in case iptables TEE is used - tcp: drop silly ICMPv6 packet too big messages to avoid expensive and pointless lookups (which may serve as a DDOS vector) - make sure fwmark is copied in SYNACK packets - fix 'disable_policy' for forwarded packets (align with IPv4) - netfilter: conntrack: - do not renew entry stuck in tcp SYN_SENT state - do not mark RST in the reply direction coming after SYN packet for an out-of-sync entry - mptcp: cleanly handle error conditions with MP_JOIN and syncookies - mptcp: fix double free when rejecting a join due to port mismatch - validate lwtstate->data before returning from skb_tunnel_info() - tcp: call sk_wmem_schedule before sk_mem_charge in zerocopy path - mt76: mt7921: continue to probe driver when fw already downloaded - bonding: fix multiple issues with offloading IPsec to (thru?) bond - stmmac: ptp: fix issues around Qbv support and setting time back - bcmgenet: always clear wake-up based on energy detection Misc: - sctp: move 198 addresses from unusable to private scope - ptp: support virtual clocks and timestamping - openvswitch: optimize operation for key comparison" * tag 'net-5.14-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (158 commits) net: dsa: properly check for the bridge_leave methods in dsa_switch_bridge_leave() sfc: add logs explaining XDP_TX/REDIRECT is not available sfc: ensure correct number of XDP queues sfc: fix lack of XDP TX queues - error XDP TX failed (-22) net: fddi: fix UAF in fza_probe net: dsa: sja1105: fix address learning getting disabled on the CPU port net: ocelot: fix switchdev objects synced for wrong netdev with LAG offload net: Use nlmsg_unicast() instead of netlink_unicast() octeontx2-pf: Fix uninitialized boolean variable pps ipv6: allocate enough headroom in ip6_finish_output2() net: hdlc: rename 'mod_init' & 'mod_exit' functions to be module-specific net: bridge: multicast: fix MRD advertisement router port marking race net: bridge: multicast: fix PIM hello router port marking race net: phy: marvell10g: fix differentiation of 88X3310 from 88X3340 dsa: fix for_each_child.cocci warnings virtio_net: check virtqueue_add_sgs() return value mptcp: properly account bulk freed memory selftests: mptcp: fix case multiple subflows limited by server mptcp: avoid processing packet if a subflow reset mptcp: fix syncookie process if mptcp can not_accept new subflow ...
Diffstat (limited to 'net/core')
-rw-r--r--net/core/dev.c16
-rw-r--r--net/core/skbuff.c1
-rw-r--r--net/core/sock.c71
3 files changed, 83 insertions, 5 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index c253c2aafe97..64b21f0a2048 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6008,6 +6008,19 @@ static void gro_list_prepare(const struct list_head *head,
diffs = memcmp(skb_mac_header(p),
skb_mac_header(skb),
maclen);
+
+ diffs |= skb_get_nfct(p) ^ skb_get_nfct(skb);
+#if IS_ENABLED(CONFIG_SKB_EXTENSIONS) && IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+ if (!diffs) {
+ struct tc_skb_ext *skb_ext = skb_ext_find(skb, TC_SKB_EXT);
+ struct tc_skb_ext *p_ext = skb_ext_find(p, TC_SKB_EXT);
+
+ diffs |= (!!p_ext) ^ (!!skb_ext);
+ if (!diffs && unlikely(skb_ext))
+ diffs |= p_ext->chain ^ skb_ext->chain;
+ }
+#endif
+
NAPI_GRO_CB(p)->same_flow = !diffs;
}
}
@@ -6221,6 +6234,8 @@ static gro_result_t napi_skb_finish(struct napi_struct *napi,
case GRO_MERGED_FREE:
if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
napi_skb_free_stolen_head(skb);
+ else if (skb->fclone != SKB_FCLONE_UNAVAILABLE)
+ __kfree_skb(skb);
else
__kfree_skb_defer(skb);
break;
@@ -6270,6 +6285,7 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
skb_shinfo(skb)->gso_type = 0;
skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
skb_ext_reset(skb);
+ nf_reset_ct(skb);
napi->skb = skb;
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 12aabcda6db2..f63de967ac25 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -943,6 +943,7 @@ void __kfree_skb_defer(struct sk_buff *skb)
void napi_skb_free_stolen_head(struct sk_buff *skb)
{
+ nf_reset_ct(skb);
skb_dst_drop(skb);
skb_ext_put(skb);
napi_skb_cache_put(skb);
diff --git a/net/core/sock.c b/net/core/sock.c
index ba1c0f75cd45..a3eea6e0b30a 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -139,6 +139,8 @@
#include <net/tcp.h>
#include <net/busy_poll.h>
+#include <linux/ethtool.h>
+
static DEFINE_MUTEX(proto_list_mutex);
static LIST_HEAD(proto_list);
@@ -810,8 +812,47 @@ void sock_set_timestamp(struct sock *sk, int optname, bool valbool)
}
}
-int sock_set_timestamping(struct sock *sk, int optname, int val)
+static int sock_timestamping_bind_phc(struct sock *sk, int phc_index)
+{
+ struct net *net = sock_net(sk);
+ struct net_device *dev = NULL;
+ bool match = false;
+ int *vclock_index;
+ int i, num;
+
+ if (sk->sk_bound_dev_if)
+ dev = dev_get_by_index(net, sk->sk_bound_dev_if);
+
+ if (!dev) {
+ pr_err("%s: sock not bind to device\n", __func__);
+ return -EOPNOTSUPP;
+ }
+
+ num = ethtool_get_phc_vclocks(dev, &vclock_index);
+ for (i = 0; i < num; i++) {
+ if (*(vclock_index + i) == phc_index) {
+ match = true;
+ break;
+ }
+ }
+
+ if (num > 0)
+ kfree(vclock_index);
+
+ if (!match)
+ return -EINVAL;
+
+ sk->sk_bind_phc = phc_index;
+
+ return 0;
+}
+
+int sock_set_timestamping(struct sock *sk, int optname,
+ struct so_timestamping timestamping)
{
+ int val = timestamping.flags;
+ int ret;
+
if (val & ~SOF_TIMESTAMPING_MASK)
return -EINVAL;
@@ -832,6 +873,12 @@ int sock_set_timestamping(struct sock *sk, int optname, int val)
!(val & SOF_TIMESTAMPING_OPT_TSONLY))
return -EINVAL;
+ if (val & SOF_TIMESTAMPING_BIND_PHC) {
+ ret = sock_timestamping_bind_phc(sk, timestamping.bind_phc);
+ if (ret)
+ return ret;
+ }
+
sk->sk_tsflags = val;
sock_valbool_flag(sk, SOCK_TSTAMP_NEW, optname == SO_TIMESTAMPING_NEW);
@@ -907,6 +954,7 @@ EXPORT_SYMBOL(sock_set_mark);
int sock_setsockopt(struct socket *sock, int level, int optname,
sockptr_t optval, unsigned int optlen)
{
+ struct so_timestamping timestamping;
struct sock_txtime sk_txtime;
struct sock *sk = sock->sk;
int val;
@@ -1068,12 +1116,22 @@ set_sndbuf:
case SO_TIMESTAMP_NEW:
case SO_TIMESTAMPNS_OLD:
case SO_TIMESTAMPNS_NEW:
- sock_set_timestamp(sk, valbool, optname);
+ sock_set_timestamp(sk, optname, valbool);
break;
case SO_TIMESTAMPING_NEW:
case SO_TIMESTAMPING_OLD:
- ret = sock_set_timestamping(sk, optname, val);
+ if (optlen == sizeof(timestamping)) {
+ if (copy_from_sockptr(&timestamping, optval,
+ sizeof(timestamping))) {
+ ret = -EFAULT;
+ break;
+ }
+ } else {
+ memset(&timestamping, 0, sizeof(timestamping));
+ timestamping.flags = val;
+ }
+ ret = sock_set_timestamping(sk, optname, timestamping);
break;
case SO_RCVLOWAT:
@@ -1201,7 +1259,7 @@ set_sndbuf:
if (val < 0)
ret = -EINVAL;
else
- sk->sk_ll_usec = val;
+ WRITE_ONCE(sk->sk_ll_usec, val);
}
break;
case SO_PREFER_BUSY_POLL:
@@ -1348,6 +1406,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
struct __kernel_old_timeval tm;
struct __kernel_sock_timeval stm;
struct sock_txtime txtime;
+ struct so_timestamping timestamping;
} v;
int lv = sizeof(int);
@@ -1451,7 +1510,9 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
break;
case SO_TIMESTAMPING_OLD:
- v.val = sk->sk_tsflags;
+ lv = sizeof(v.timestamping);
+ v.timestamping.flags = sk->sk_tsflags;
+ v.timestamping.bind_phc = sk->sk_bind_phc;
break;
case SO_RCVTIMEO_OLD: