From c7c1abfd6d42be8f09d390ab912cd84983000fa2 Mon Sep 17 00:00:00 2001 From: Eyal Birger Date: Fri, 26 Feb 2021 23:35:05 +0200 Subject: vti: fix ipv4 pmtu check to honor ip header df Frag needed should only be sent if the header enables DF. This fix allows packets larger than MTU to pass the vti interface and be fragmented after encapsulation, aligning behavior with non-vti xfrm. Fixes: d6af1a31cc72 ("vti: Add pmtu handling to vti_xmit.") Signed-off-by: Eyal Birger Reviewed-by: Sabrina Dubroca Signed-off-by: Steffen Klassert --- net/ipv4/ip_vti.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index abc171e79d3e..613741384490 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -218,7 +218,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev, } if (dst->flags & DST_XFRM_QUEUE) - goto queued; + goto xmit; if (!vti_state_check(dst->xfrm, parms->iph.daddr, parms->iph.saddr)) { dev->stats.tx_carrier_errors++; @@ -238,6 +238,8 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev, if (skb->len > mtu) { skb_dst_update_pmtu_no_confirm(skb, mtu); if (skb->protocol == htons(ETH_P_IP)) { + if (!(ip_hdr(skb)->frag_off & htons(IP_DF))) + goto xmit; icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); } else { @@ -251,7 +253,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev, goto tx_error; } -queued: +xmit: skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev))); skb_dst_set(skb, dst); skb->dev = skb_dst(skb)->dev; -- cgit v1.2.3 From 9ab1265d52314fce1b51e8665ea6dbc9ac1a027c Mon Sep 17 00:00:00 2001 From: Evan Nimmo Date: Tue, 2 Mar 2021 08:00:04 +1300 Subject: xfrm: Use actual socket sk instead of skb socket for xfrm_output_resume A situation can occur where the interface bound to the sk is different to the interface bound to the sk attached to the skb. The interface bound to the sk is the correct one however this information is lost inside xfrm_output2 and instead the sk on the skb is used in xfrm_output_resume instead. This assumes that the sk bound interface and the bound interface attached to the sk within the skb are the same which can lead to lookup failures inside ip_route_me_harder resulting in the packet being dropped. We have an l2tp v3 tunnel with ipsec protection. The tunnel is in the global VRF however we have an encapsulated dot1q tunnel interface that is within a different VRF. We also have a mangle rule that marks the packets causing them to be processed inside ip_route_me_harder. Prior to commit 31c70d5956fc ("l2tp: keep original skb ownership") this worked fine as the sk attached to the skb was changed from the dot1q encapsulated interface to the sk for the tunnel which meant the interface bound to the sk and the interface bound to the skb were identical. Commit 46d6c5ae953c ("netfilter: use actual socket sk rather than skb sk when routing harder") fixed some of these issues however a similar problem existed in the xfrm code. Fixes: 31c70d5956fc ("l2tp: keep original skb ownership") Signed-off-by: Evan Nimmo Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 2 +- net/ipv4/ah4.c | 2 +- net/ipv4/esp4.c | 2 +- net/ipv6/ah6.c | 2 +- net/ipv6/esp6.c | 2 +- net/xfrm/xfrm_output.c | 10 +++++----- 6 files changed, 10 insertions(+), 10 deletions(-) (limited to 'net/ipv4') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index b2a06f10b62c..bfbc7810df94 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1557,7 +1557,7 @@ int xfrm_trans_queue_net(struct net *net, struct sk_buff *skb, int xfrm_trans_queue(struct sk_buff *skb, int (*finish)(struct net *, struct sock *, struct sk_buff *)); -int xfrm_output_resume(struct sk_buff *skb, int err); +int xfrm_output_resume(struct sock *sk, struct sk_buff *skb, int err); int xfrm_output(struct sock *sk, struct sk_buff *skb); #if IS_ENABLED(CONFIG_NET_PKTGEN) diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index d99e1be94019..36ed85bf2ad5 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -141,7 +141,7 @@ static void ah_output_done(struct crypto_async_request *base, int err) } kfree(AH_SKB_CB(skb)->tmp); - xfrm_output_resume(skb, err); + xfrm_output_resume(skb->sk, skb, err); } static int ah_output(struct xfrm_state *x, struct sk_buff *skb) diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index a3271ec3e162..4b834bbf95e0 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -279,7 +279,7 @@ static void esp_output_done(struct crypto_async_request *base, int err) x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP) esp_output_tail_tcp(x, skb); else - xfrm_output_resume(skb, err); + xfrm_output_resume(skb->sk, skb, err); } } diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 440080da805b..080ee7f44c64 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -316,7 +316,7 @@ static void ah6_output_done(struct crypto_async_request *base, int err) } kfree(AH_SKB_CB(skb)->tmp); - xfrm_output_resume(skb, err); + xfrm_output_resume(skb->sk, skb, err); } static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 153ad103ba74..727d791ed5e6 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -314,7 +314,7 @@ static void esp_output_done(struct crypto_async_request *base, int err) x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP) esp_output_tail_tcp(x, skb); else - xfrm_output_resume(skb, err); + xfrm_output_resume(skb->sk, skb, err); } } diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index a7ab19353313..b81ca117dac7 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -503,22 +503,22 @@ out: return err; } -int xfrm_output_resume(struct sk_buff *skb, int err) +int xfrm_output_resume(struct sock *sk, struct sk_buff *skb, int err) { struct net *net = xs_net(skb_dst(skb)->xfrm); while (likely((err = xfrm_output_one(skb, err)) == 0)) { nf_reset_ct(skb); - err = skb_dst(skb)->ops->local_out(net, skb->sk, skb); + err = skb_dst(skb)->ops->local_out(net, sk, skb); if (unlikely(err != 1)) goto out; if (!skb_dst(skb)->xfrm) - return dst_output(net, skb->sk, skb); + return dst_output(net, sk, skb); err = nf_hook(skb_dst(skb)->ops->family, - NF_INET_POST_ROUTING, net, skb->sk, skb, + NF_INET_POST_ROUTING, net, sk, skb, NULL, skb_dst(skb)->dev, xfrm_output2); if (unlikely(err != 1)) goto out; @@ -534,7 +534,7 @@ EXPORT_SYMBOL_GPL(xfrm_output_resume); static int xfrm_output2(struct net *net, struct sock *sk, struct sk_buff *skb) { - return xfrm_output_resume(skb, 1); + return xfrm_output_resume(sk, skb, 1); } static int xfrm_output_gso(struct net *net, struct sock *sk, struct sk_buff *skb) -- cgit v1.2.3 From 154deab6a3ba47792936edf77f2f13a1cbc4351d Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 19 Mar 2021 15:35:07 +0800 Subject: esp: delete NETIF_F_SCTP_CRC bit from features for esp offload Now in esp4/6_gso_segment(), before calling inner proto .gso_segment, NETIF_F_CSUM_MASK bits are deleted, as HW won't be able to do the csum for inner proto due to the packet encrypted already. So the UDP/TCP packet has to do the checksum on its own .gso_segment. But SCTP is using CRC checksum, and for that NETIF_F_SCTP_CRC should be deleted to make SCTP do the csum in own .gso_segment as well. In Xiumei's testing with SCTP over IPsec/veth, the packets are kept dropping due to the wrong CRC checksum. Reported-by: Xiumei Mu Fixes: 7862b4058b9f ("esp: Add gso handlers for esp4 and esp6") Signed-off-by: Xin Long Signed-off-by: Steffen Klassert --- net/ipv4/esp4_offload.c | 6 ++++-- net/ipv6/esp6_offload.c | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index 601f5fbfc63f..ed3de486ea34 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -217,10 +217,12 @@ static struct sk_buff *esp4_gso_segment(struct sk_buff *skb, if ((!(skb->dev->gso_partial_features & NETIF_F_HW_ESP) && !(features & NETIF_F_HW_ESP)) || x->xso.dev != skb->dev) - esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK); + esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK | + NETIF_F_SCTP_CRC); else if (!(features & NETIF_F_HW_ESP_TX_CSUM) && !(skb->dev->gso_partial_features & NETIF_F_HW_ESP_TX_CSUM)) - esp_features = features & ~NETIF_F_CSUM_MASK; + esp_features = features & ~(NETIF_F_CSUM_MASK | + NETIF_F_SCTP_CRC); xo->flags |= XFRM_GSO_SEGMENT; diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c index 1ca516fb30e1..f35203ab39f5 100644 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@ -254,9 +254,11 @@ static struct sk_buff *esp6_gso_segment(struct sk_buff *skb, skb->encap_hdr_csum = 1; if (!(features & NETIF_F_HW_ESP) || x->xso.dev != skb->dev) - esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK); + esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK | + NETIF_F_SCTP_CRC); else if (!(features & NETIF_F_HW_ESP_TX_CSUM)) - esp_features = features & ~NETIF_F_CSUM_MASK; + esp_features = features & ~(NETIF_F_CSUM_MASK | + NETIF_F_SCTP_CRC); xo->flags |= XFRM_GSO_SEGMENT; -- cgit v1.2.3 From c7dbf4c08868d9db89b8bfe8f8245ca61b01ed2f Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Fri, 26 Mar 2021 09:44:48 +0100 Subject: xfrm: Provide private skb extensions for segmented and hw offloaded ESP packets Commit 94579ac3f6d0 ("xfrm: Fix double ESP trailer insertion in IPsec crypto offload.") added a XFRM_XMIT flag to avoid duplicate ESP trailer insertion on HW offload. This flag is set on the secpath that is shared amongst segments. This lead to a situation where some segments are not transformed correctly when segmentation happens at layer 3. Fix this by using private skb extensions for segmented and hw offloaded ESP packets. Fixes: 94579ac3f6d0 ("xfrm: Fix double ESP trailer insertion in IPsec crypto offload.") Signed-off-by: Steffen Klassert --- net/ipv4/esp4_offload.c | 11 ++++++++++- net/ipv6/esp6_offload.c | 11 ++++++++++- net/xfrm/xfrm_device.c | 2 -- 3 files changed, 20 insertions(+), 4 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index ed3de486ea34..33687cf58286 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -314,8 +314,17 @@ static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_ ip_hdr(skb)->tot_len = htons(skb->len); ip_send_check(ip_hdr(skb)); - if (hw_offload) + if (hw_offload) { + if (!skb_ext_add(skb, SKB_EXT_SEC_PATH)) + return -ENOMEM; + + xo = xfrm_offload(skb); + if (!xo) + return -EINVAL; + + xo->flags |= XFRM_XMIT; return 0; + } err = esp_output_tail(x, skb, &esp); if (err) diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c index f35203ab39f5..4af56affaafd 100644 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@ -348,8 +348,17 @@ static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features ipv6_hdr(skb)->payload_len = htons(len); - if (hw_offload) + if (hw_offload) { + if (!skb_ext_add(skb, SKB_EXT_SEC_PATH)) + return -ENOMEM; + + xo = xfrm_offload(skb); + if (!xo) + return -EINVAL; + + xo->flags |= XFRM_XMIT; return 0; + } err = esp6_output_tail(x, skb, &esp); if (err) diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index edf11893dbe8..6d6917b68856 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -134,8 +134,6 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur return skb; } - xo->flags |= XFRM_XMIT; - if (skb_is_gso(skb) && unlikely(x->xso.dev != dev)) { struct sk_buff *segs; -- cgit v1.2.3 From 98184612aca0a9ee42b8eb0262a49900ee9eef0d Mon Sep 17 00:00:00 2001 From: Norman Maurer Date: Thu, 1 Apr 2021 08:59:17 +0200 Subject: net: udp: Add support for getsockopt(..., ..., UDP_GRO, ..., ...); Support for UDP_GRO was added in the past but the implementation for getsockopt was missed which did lead to an error when we tried to retrieve the setting for UDP_GRO. This patch adds the missing switch case for UDP_GRO Fixes: e20cf8d3f1f7 ("udp: implement GRO for plain UDP sockets.") Signed-off-by: Norman Maurer Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/udp.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net/ipv4') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 4a0478b17243..99d743eb9dc4 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2754,6 +2754,10 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname, val = up->gso_size; break; + case UDP_GRO: + val = up->gro_enabled; + break; + /* The following two cannot be changed on UDP sockets, the return is * always 0 (which corresponds to the full checksum coverage of UDP). */ case UDPLITE_SEND_CSCOV: -- cgit v1.2.3 From 3583a4e8d77d44697a21437227dd53fc6e7b2cb5 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 7 Apr 2021 08:59:12 -0700 Subject: ipv6: report errors for iftoken via netlink extack Setting iftoken can fail for several different reasons but there and there was no report to user as to the cause. Add netlink extended errors to the processing of the request. This requires adding additional argument through rtnl_af_ops set_link_af callback. Reported-by: Hongren Zheng Signed-off-by: Stephen Hemminger Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/net/rtnetlink.h | 4 ++-- net/core/rtnetlink.c | 2 +- net/ipv4/devinet.c | 3 ++- net/ipv6/addrconf.c | 32 ++++++++++++++++++++++++++------ 4 files changed, 31 insertions(+), 10 deletions(-) (limited to 'net/ipv4') diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index 4da61c950e93..479f60ef54c0 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -147,8 +147,8 @@ struct rtnl_af_ops { int (*validate_link_af)(const struct net_device *dev, const struct nlattr *attr); int (*set_link_af)(struct net_device *dev, - const struct nlattr *attr); - + const struct nlattr *attr, + struct netlink_ext_ack *extack); int (*fill_stats_af)(struct sk_buff *skb, const struct net_device *dev); size_t (*get_stats_af_size)(const struct net_device *dev); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 1bdcb33fb561..3485b16a7ff3 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2863,7 +2863,7 @@ static int do_setlink(const struct sk_buff *skb, BUG_ON(!(af_ops = rtnl_af_lookup(nla_type(af)))); - err = af_ops->set_link_af(dev, af); + err = af_ops->set_link_af(dev, af, extack); if (err < 0) { rcu_read_unlock(); goto errout; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 75f67994fc85..2e35f68da40a 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1978,7 +1978,8 @@ static int inet_validate_link_af(const struct net_device *dev, return 0; } -static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla) +static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla, + struct netlink_ext_ack *extack) { struct in_device *in_dev = __in_dev_get_rcu(dev); struct nlattr *a, *tb[IFLA_INET_MAX+1]; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index f2337fb756ac..a9e53f5942fa 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5669,7 +5669,8 @@ static int inet6_fill_link_af(struct sk_buff *skb, const struct net_device *dev, return 0; } -static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token) +static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token, + struct netlink_ext_ack *extack) { struct inet6_ifaddr *ifp; struct net_device *dev = idev->dev; @@ -5680,12 +5681,29 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token) if (!token) return -EINVAL; - if (dev->flags & (IFF_LOOPBACK | IFF_NOARP)) + + if (dev->flags & IFF_LOOPBACK) { + NL_SET_ERR_MSG_MOD(extack, "Device is loopback"); return -EINVAL; - if (!ipv6_accept_ra(idev)) + } + + if (dev->flags & IFF_NOARP) { + NL_SET_ERR_MSG_MOD(extack, + "Device does not do neighbour discovery"); + return -EINVAL; + } + + if (!ipv6_accept_ra(idev)) { + NL_SET_ERR_MSG_MOD(extack, + "Router advertisement is disabled on device"); return -EINVAL; - if (idev->cnf.rtr_solicits == 0) + } + + if (idev->cnf.rtr_solicits == 0) { + NL_SET_ERR_MSG(extack, + "Router solicitation is disabled on device"); return -EINVAL; + } write_lock_bh(&idev->lock); @@ -5793,7 +5811,8 @@ static int inet6_validate_link_af(const struct net_device *dev, return 0; } -static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla) +static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla, + struct netlink_ext_ack *extack) { struct inet6_dev *idev = __in6_dev_get(dev); struct nlattr *tb[IFLA_INET6_MAX + 1]; @@ -5806,7 +5825,8 @@ static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla) BUG(); if (tb[IFLA_INET6_TOKEN]) { - err = inet6_set_iftoken(idev, nla_data(tb[IFLA_INET6_TOKEN])); + err = inet6_set_iftoken(idev, nla_data(tb[IFLA_INET6_TOKEN]), + extack); if (err) return err; } -- cgit v1.2.3