diff options
-rw-r--r-- | include/net/gro.h | 1 | ||||
-rw-r--r-- | include/net/tcp.h | 5 | ||||
-rw-r--r-- | net/core/gro.c | 27 | ||||
-rw-r--r-- | net/ipv4/tcp_offload.c | 214 | ||||
-rw-r--r-- | net/ipv4/udp_offload.c | 27 | ||||
-rw-r--r-- | net/ipv6/tcpv6_offload.c | 120 |
6 files changed, 325 insertions, 69 deletions
diff --git a/include/net/gro.h b/include/net/gro.h index c1d4ca0463a1..5df8bf318197 100644 --- a/include/net/gro.h +++ b/include/net/gro.h @@ -438,6 +438,7 @@ static inline __wsum ip6_gro_compute_pseudo(const struct sk_buff *skb, } int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb); +int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb); /* Pass the currently batched GRO_NORMAL SKBs up to the stack. */ static inline void gro_normal_list(struct napi_struct *napi) diff --git a/include/net/tcp.h b/include/net/tcp.h index 0a51e6a45bce..8f63a163c7de 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2191,7 +2191,10 @@ void tcp_v4_destroy_sock(struct sock *sk); struct sk_buff *tcp_gso_segment(struct sk_buff *skb, netdev_features_t features); -struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb); +struct tcphdr *tcp_gro_pull_header(struct sk_buff *skb); +struct sk_buff *tcp_gro_lookup(struct list_head *head, struct tcphdr *th); +struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb, + struct tcphdr *th); INDIRECT_CALLABLE_DECLARE(int tcp4_gro_complete(struct sk_buff *skb, int thoff)); INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb)); INDIRECT_CALLABLE_DECLARE(int tcp6_gro_complete(struct sk_buff *skb, int thoff)); diff --git a/net/core/gro.c b/net/core/gro.c index 99a45a5211c9..e2f84947fb74 100644 --- a/net/core/gro.c +++ b/net/core/gro.c @@ -231,6 +231,33 @@ done: return 0; } +int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb) +{ + if (unlikely(p->len + skb->len >= 65536)) + return -E2BIG; + + if (NAPI_GRO_CB(p)->last == p) + skb_shinfo(p)->frag_list = skb; + else + NAPI_GRO_CB(p)->last->next = skb; + + skb_pull(skb, skb_gro_offset(skb)); + + NAPI_GRO_CB(p)->last = skb; + NAPI_GRO_CB(p)->count++; + p->data_len += skb->len; + + /* sk ownership - if any - completely transferred to the aggregated packet */ + skb->destructor = NULL; + skb->sk = NULL; + p->truesize += skb->truesize; + p->len += skb->len; + + NAPI_GRO_CB(skb)->same_flow = 1; + + return 0; +} + static void napi_gro_complete(struct napi_struct *napi, struct sk_buff *skb) { diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index fab0973f995b..c90704befd7b 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -28,6 +28,70 @@ static void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq, } } +static void __tcpv4_gso_segment_csum(struct sk_buff *seg, + __be32 *oldip, __be32 newip, + __be16 *oldport, __be16 newport) +{ + struct tcphdr *th; + struct iphdr *iph; + + if (*oldip == newip && *oldport == newport) + return; + + th = tcp_hdr(seg); + iph = ip_hdr(seg); + + inet_proto_csum_replace4(&th->check, seg, *oldip, newip, true); + inet_proto_csum_replace2(&th->check, seg, *oldport, newport, false); + *oldport = newport; + + csum_replace4(&iph->check, *oldip, newip); + *oldip = newip; +} + +static struct sk_buff *__tcpv4_gso_segment_list_csum(struct sk_buff *segs) +{ + const struct tcphdr *th; + const struct iphdr *iph; + struct sk_buff *seg; + struct tcphdr *th2; + struct iphdr *iph2; + + seg = segs; + th = tcp_hdr(seg); + iph = ip_hdr(seg); + th2 = tcp_hdr(seg->next); + iph2 = ip_hdr(seg->next); + + if (!(*(const u32 *)&th->source ^ *(const u32 *)&th2->source) && + iph->daddr == iph2->daddr && iph->saddr == iph2->saddr) + return segs; + + while ((seg = seg->next)) { + th2 = tcp_hdr(seg); + iph2 = ip_hdr(seg); + + __tcpv4_gso_segment_csum(seg, + &iph2->saddr, iph->saddr, + &th2->source, th->source); + __tcpv4_gso_segment_csum(seg, + &iph2->daddr, iph->daddr, + &th2->dest, th->dest); + } + + return segs; +} + +static struct sk_buff *__tcp4_gso_segment_list(struct sk_buff *skb, + netdev_features_t features) +{ + skb = skb_segment_list(skb, features, skb_mac_header_len(skb)); + if (IS_ERR(skb)) + return skb; + + return __tcpv4_gso_segment_list_csum(skb); +} + static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb, netdev_features_t features) { @@ -37,6 +101,9 @@ static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb, if (!pskb_may_pull(skb, sizeof(struct tcphdr))) return ERR_PTR(-EINVAL); + if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST) + return __tcp4_gso_segment_list(skb, features); + if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { const struct iphdr *iph = ip_hdr(skb); struct tcphdr *th = tcp_hdr(skb); @@ -178,61 +245,76 @@ out: return segs; } -struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb) +struct sk_buff *tcp_gro_lookup(struct list_head *head, struct tcphdr *th) { - struct sk_buff *pp = NULL; + struct tcphdr *th2; struct sk_buff *p; + + list_for_each_entry(p, head, list) { + if (!NAPI_GRO_CB(p)->same_flow) + continue; + + th2 = tcp_hdr(p); + if (*(u32 *)&th->source ^ *(u32 *)&th2->source) { + NAPI_GRO_CB(p)->same_flow = 0; + continue; + } + + return p; + } + + return NULL; +} + +struct tcphdr *tcp_gro_pull_header(struct sk_buff *skb) +{ + unsigned int thlen, hlen, off; struct tcphdr *th; - struct tcphdr *th2; - unsigned int len; - unsigned int thlen; - __be32 flags; - unsigned int mss = 1; - unsigned int hlen; - unsigned int off; - int flush = 1; - int i; off = skb_gro_offset(skb); hlen = off + sizeof(*th); th = skb_gro_header(skb, hlen, off); if (unlikely(!th)) - goto out; + return NULL; thlen = th->doff * 4; if (thlen < sizeof(*th)) - goto out; + return NULL; hlen = off + thlen; if (!skb_gro_may_pull(skb, hlen)) { th = skb_gro_header_slow(skb, hlen, off); if (unlikely(!th)) - goto out; + return NULL; } skb_gro_pull(skb, thlen); - len = skb_gro_len(skb); - flags = tcp_flag_word(th); - - list_for_each_entry(p, head, list) { - if (!NAPI_GRO_CB(p)->same_flow) - continue; + return th; +} - th2 = tcp_hdr(p); +struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb, + struct tcphdr *th) +{ + unsigned int thlen = th->doff * 4; + struct sk_buff *pp = NULL; + struct sk_buff *p; + struct tcphdr *th2; + unsigned int len; + __be32 flags; + unsigned int mss = 1; + int flush = 1; + int i; - if (*(u32 *)&th->source ^ *(u32 *)&th2->source) { - NAPI_GRO_CB(p)->same_flow = 0; - continue; - } + len = skb_gro_len(skb); + flags = tcp_flag_word(th); - goto found; - } - p = NULL; - goto out_check_final; + p = tcp_gro_lookup(head, th); + if (!p) + goto out_check_final; -found: /* Include the IP ID check below from the inner most IP hdr */ + th2 = tcp_hdr(p); flush = NAPI_GRO_CB(p)->flush; flush |= (__force int)(flags & TCP_FLAG_CWR); flush |= (__force int)((flags ^ tcp_flag_word(th2)) & @@ -267,6 +349,18 @@ found: flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq); flush |= skb_cmp_decrypted(p, skb); + if (unlikely(NAPI_GRO_CB(p)->is_flist)) { + flush |= (__force int)(flags ^ tcp_flag_word(th2)); + flush |= skb->ip_summed != p->ip_summed; + flush |= skb->csum_level != p->csum_level; + flush |= NAPI_GRO_CB(p)->count >= 64; + + if (flush || skb_gro_receive_list(p, skb)) + mss = 1; + + goto out_check_final; + } + if (flush || skb_gro_receive(p, skb)) { mss = 1; goto out_check_final; @@ -288,7 +382,6 @@ out_check_final: if (p && (!NAPI_GRO_CB(skb)->same_flow || flush)) pp = p; -out: NAPI_GRO_CB(skb)->flush |= (flush != 0); return pp; @@ -314,18 +407,58 @@ void tcp_gro_complete(struct sk_buff *skb) } EXPORT_SYMBOL(tcp_gro_complete); +static void tcp4_check_fraglist_gro(struct list_head *head, struct sk_buff *skb, + struct tcphdr *th) +{ + const struct iphdr *iph; + struct sk_buff *p; + struct sock *sk; + struct net *net; + int iif, sdif; + + if (likely(!(skb->dev->features & NETIF_F_GRO_FRAGLIST))) + return; + + p = tcp_gro_lookup(head, th); + if (p) { + NAPI_GRO_CB(skb)->is_flist = NAPI_GRO_CB(p)->is_flist; + return; + } + + inet_get_iif_sdif(skb, &iif, &sdif); + iph = skb_gro_network_header(skb); + net = dev_net(skb->dev); + sk = __inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, + iph->saddr, th->source, + iph->daddr, ntohs(th->dest), + iif, sdif); + NAPI_GRO_CB(skb)->is_flist = !sk; + if (sk) + sock_put(sk); +} + INDIRECT_CALLABLE_SCOPE struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb) { + struct tcphdr *th; + /* Don't bother verifying checksum if we're going to flush anyway. */ if (!NAPI_GRO_CB(skb)->flush && skb_gro_checksum_validate(skb, IPPROTO_TCP, - inet_gro_compute_pseudo)) { - NAPI_GRO_CB(skb)->flush = 1; - return NULL; - } + inet_gro_compute_pseudo)) + goto flush; + + th = tcp_gro_pull_header(skb); + if (!th) + goto flush; - return tcp_gro_receive(head, skb); + tcp4_check_fraglist_gro(head, skb, th); + + return tcp_gro_receive(head, skb, th); + +flush: + NAPI_GRO_CB(skb)->flush = 1; + return NULL; } INDIRECT_CALLABLE_SCOPE int tcp4_gro_complete(struct sk_buff *skb, int thoff) @@ -333,6 +466,15 @@ INDIRECT_CALLABLE_SCOPE int tcp4_gro_complete(struct sk_buff *skb, int thoff) const struct iphdr *iph = ip_hdr(skb); struct tcphdr *th = tcp_hdr(skb); + if (unlikely(NAPI_GRO_CB(skb)->is_flist)) { + skb_shinfo(skb)->gso_type |= SKB_GSO_FRAGLIST | SKB_GSO_TCPV4; + skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; + + __skb_incr_checksum_unnecessary(skb); + + return 0; + } + th->check = ~tcp_v4_check(skb->len - thoff, iph->saddr, iph->daddr, 0); diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 8721fe5beca2..f6d3c442e260 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -433,33 +433,6 @@ out: return segs; } -static int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb) -{ - if (unlikely(p->len + skb->len >= 65536)) - return -E2BIG; - - if (NAPI_GRO_CB(p)->last == p) - skb_shinfo(p)->frag_list = skb; - else - NAPI_GRO_CB(p)->last->next = skb; - - skb_pull(skb, skb_gro_offset(skb)); - - NAPI_GRO_CB(p)->last = skb; - NAPI_GRO_CB(p)->count++; - p->data_len += skb->len; - - /* sk ownership - if any - completely transferred to the aggregated packet */ - skb->destructor = NULL; - skb->sk = NULL; - p->truesize += skb->truesize; - p->len += skb->len; - - NAPI_GRO_CB(skb)->same_flow = 1; - - return 0; -} - #define UDP_GRO_CNT_MAX 64 static struct sk_buff *udp_gro_receive_segment(struct list_head *head, diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c index 4b07d1e6c952..d59f58cbd306 100644 --- a/net/ipv6/tcpv6_offload.c +++ b/net/ipv6/tcpv6_offload.c @@ -7,24 +7,67 @@ */ #include <linux/indirect_call_wrapper.h> #include <linux/skbuff.h> +#include <net/inet6_hashtables.h> #include <net/gro.h> #include <net/protocol.h> #include <net/tcp.h> #include <net/ip6_checksum.h> #include "ip6_offload.h" +static void tcp6_check_fraglist_gro(struct list_head *head, struct sk_buff *skb, + struct tcphdr *th) +{ +#if IS_ENABLED(CONFIG_IPV6) + const struct ipv6hdr *hdr; + struct sk_buff *p; + struct sock *sk; + struct net *net; + int iif, sdif; + + if (likely(!(skb->dev->features & NETIF_F_GRO_FRAGLIST))) + return; + + p = tcp_gro_lookup(head, th); + if (p) { + NAPI_GRO_CB(skb)->is_flist = NAPI_GRO_CB(p)->is_flist; + return; + } + + inet6_get_iif_sdif(skb, &iif, &sdif); + hdr = skb_gro_network_header(skb); + net = dev_net(skb->dev); + sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, + &hdr->saddr, th->source, + &hdr->daddr, ntohs(th->dest), + iif, sdif); + NAPI_GRO_CB(skb)->is_flist = !sk; + if (sk) + sock_put(sk); +#endif /* IS_ENABLED(CONFIG_IPV6) */ +} + INDIRECT_CALLABLE_SCOPE struct sk_buff *tcp6_gro_receive(struct list_head *head, struct sk_buff *skb) { + struct tcphdr *th; + /* Don't bother verifying checksum if we're going to flush anyway. */ if (!NAPI_GRO_CB(skb)->flush && skb_gro_checksum_validate(skb, IPPROTO_TCP, - ip6_gro_compute_pseudo)) { - NAPI_GRO_CB(skb)->flush = 1; - return NULL; - } + ip6_gro_compute_pseudo)) + goto flush; - return tcp_gro_receive(head, skb); + th = tcp_gro_pull_header(skb); + if (!th) + goto flush; + + tcp6_check_fraglist_gro(head, skb, th); + + return tcp_gro_receive(head, skb, th); + +flush: + NAPI_GRO_CB(skb)->flush = 1; + return NULL; } INDIRECT_CALLABLE_SCOPE int tcp6_gro_complete(struct sk_buff *skb, int thoff) @@ -32,6 +75,15 @@ INDIRECT_CALLABLE_SCOPE int tcp6_gro_complete(struct sk_buff *skb, int thoff) const struct ipv6hdr *iph = ipv6_hdr(skb); struct tcphdr *th = tcp_hdr(skb); + if (unlikely(NAPI_GRO_CB(skb)->is_flist)) { + skb_shinfo(skb)->gso_type |= SKB_GSO_FRAGLIST | SKB_GSO_TCPV6; + skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; + + __skb_incr_checksum_unnecessary(skb); + + return 0; + } + th->check = ~tcp_v6_check(skb->len - thoff, &iph->saddr, &iph->daddr, 0); skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV6; @@ -40,6 +92,61 @@ INDIRECT_CALLABLE_SCOPE int tcp6_gro_complete(struct sk_buff *skb, int thoff) return 0; } +static void __tcpv6_gso_segment_csum(struct sk_buff *seg, + __be16 *oldport, __be16 newport) +{ + struct tcphdr *th; + + if (*oldport == newport) + return; + + th = tcp_hdr(seg); + inet_proto_csum_replace2(&th->check, seg, *oldport, newport, false); + *oldport = newport; +} + +static struct sk_buff *__tcpv6_gso_segment_list_csum(struct sk_buff *segs) +{ + const struct tcphdr *th; + const struct ipv6hdr *iph; + struct sk_buff *seg; + struct tcphdr *th2; + struct ipv6hdr *iph2; + + seg = segs; + th = tcp_hdr(seg); + iph = ipv6_hdr(seg); + th2 = tcp_hdr(seg->next); + iph2 = ipv6_hdr(seg->next); + + if (!(*(const u32 *)&th->source ^ *(const u32 *)&th2->source) && + ipv6_addr_equal(&iph->saddr, &iph2->saddr) && + ipv6_addr_equal(&iph->daddr, &iph2->daddr)) + return segs; + + while ((seg = seg->next)) { + th2 = tcp_hdr(seg); + iph2 = ipv6_hdr(seg); + + iph2->saddr = iph->saddr; + iph2->daddr = iph->daddr; + __tcpv6_gso_segment_csum(seg, &th2->source, th->source); + __tcpv6_gso_segment_csum(seg, &th2->dest, th->dest); + } + + return segs; +} + +static struct sk_buff *__tcp6_gso_segment_list(struct sk_buff *skb, + netdev_features_t features) +{ + skb = skb_segment_list(skb, features, skb_mac_header_len(skb)); + if (IS_ERR(skb)) + return skb; + + return __tcpv6_gso_segment_list_csum(skb); +} + static struct sk_buff *tcp6_gso_segment(struct sk_buff *skb, netdev_features_t features) { @@ -51,6 +158,9 @@ static struct sk_buff *tcp6_gso_segment(struct sk_buff *skb, if (!pskb_may_pull(skb, sizeof(*th))) return ERR_PTR(-EINVAL); + if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST) + return __tcp6_gso_segment_list(skb, features); + if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { const struct ipv6hdr *ipv6h = ipv6_hdr(skb); struct tcphdr *th = tcp_hdr(skb); |