diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/bpf_tcp_ca.c | 2 | ||||
-rw-r--r-- | net/ipv4/nexthop.c | 59 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 11 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 38 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 3 | ||||
-rw-r--r-- | net/ipv4/udp.c | 2 |
6 files changed, 75 insertions, 40 deletions
diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index 4406d796cc2f..39dcccf0f174 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -51,8 +51,6 @@ static bool is_unsupported(u32 member_offset) return false; } -extern struct btf *btf_vmlinux; - static bool bpf_tcp_ca_is_valid_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index f95142e56da0..93f14d39fef6 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -1152,41 +1152,64 @@ static bool ipv4_good_nh(const struct fib_nh *nh) return !!(state & NUD_VALID); } -static struct nexthop *nexthop_select_path_hthr(struct nh_group *nhg, int hash) +static bool nexthop_is_good_nh(const struct nexthop *nh) +{ + struct nh_info *nhi = rcu_dereference(nh->nh_info); + + switch (nhi->family) { + case AF_INET: + return ipv4_good_nh(&nhi->fib_nh); + case AF_INET6: + return ipv6_good_nh(&nhi->fib6_nh); + } + + return false; +} + +static struct nexthop *nexthop_select_path_fdb(struct nh_group *nhg, int hash) { - struct nexthop *rc = NULL; int i; - for (i = 0; i < nhg->num_nh; ++i) { + for (i = 0; i < nhg->num_nh; i++) { struct nh_grp_entry *nhge = &nhg->nh_entries[i]; - struct nh_info *nhi; if (hash > atomic_read(&nhge->hthr.upper_bound)) continue; - nhi = rcu_dereference(nhge->nh->nh_info); - if (nhi->fdb_nh) - return nhge->nh; + return nhge->nh; + } + + WARN_ON_ONCE(1); + return NULL; +} + +static struct nexthop *nexthop_select_path_hthr(struct nh_group *nhg, int hash) +{ + struct nexthop *rc = NULL; + int i; + + if (nhg->fdb_nh) + return nexthop_select_path_fdb(nhg, hash); + + for (i = 0; i < nhg->num_nh; ++i) { + struct nh_grp_entry *nhge = &nhg->nh_entries[i]; /* nexthops always check if it is good and does * not rely on a sysctl for this behavior */ - switch (nhi->family) { - case AF_INET: - if (ipv4_good_nh(&nhi->fib_nh)) - return nhge->nh; - break; - case AF_INET6: - if (ipv6_good_nh(&nhi->fib6_nh)) - return nhge->nh; - break; - } + if (!nexthop_is_good_nh(nhge->nh)) + continue; if (!rc) rc = nhge->nh; + + if (hash > atomic_read(&nhge->hthr.upper_bound)) + continue; + + return nhge->nh; } - return rc; + return rc ? : nhg->nh_entries[0].nh; } static struct nexthop *nexthop_select_path_res(struct nh_group *nhg, int hash) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 8ed52e1e3c99..aca5620cf3ba 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -457,6 +457,7 @@ void tcp_init_sock(struct sock *sk) WRITE_ONCE(sk->sk_sndbuf, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[1])); WRITE_ONCE(sk->sk_rcvbuf, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[1])); + tcp_scaling_ratio_init(sk); set_bit(SOCK_SUPPORT_ZC, &sk->sk_socket->flags); sk_sockets_allocated_inc(sk); @@ -1700,7 +1701,7 @@ EXPORT_SYMBOL(tcp_peek_len); /* Make sure sk_rcvbuf is big enough to satisfy SO_RCVLOWAT hint */ int tcp_set_rcvlowat(struct sock *sk, int val) { - int cap; + int space, cap; if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) cap = sk->sk_rcvbuf >> 1; @@ -1715,10 +1716,10 @@ int tcp_set_rcvlowat(struct sock *sk, int val) if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) return 0; - val <<= 1; - if (val > sk->sk_rcvbuf) { - WRITE_ONCE(sk->sk_rcvbuf, val); - tcp_sk(sk)->window_clamp = tcp_win_from_space(sk, val); + space = tcp_space_from_win(sk, val); + if (space > sk->sk_rcvbuf) { + WRITE_ONCE(sk->sk_rcvbuf, space); + tcp_sk(sk)->window_clamp = val; } return 0; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 57c8af1859c1..670c3dab24f2 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -237,6 +237,16 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb) */ len = skb_shinfo(skb)->gso_size ? : skb->len; if (len >= icsk->icsk_ack.rcv_mss) { + /* Note: divides are still a bit expensive. + * For the moment, only adjust scaling_ratio + * when we update icsk_ack.rcv_mss. + */ + if (unlikely(len != icsk->icsk_ack.rcv_mss)) { + u64 val = (u64)skb->len << TCP_RMEM_TO_WIN_SCALE; + + do_div(val, skb->truesize); + tcp_sk(sk)->scaling_ratio = val ? val : 1; + } icsk->icsk_ack.rcv_mss = min_t(unsigned int, len, tcp_sk(sk)->advmss); /* Account for possibly-removed options */ @@ -287,7 +297,7 @@ static void tcp_incr_quickack(struct sock *sk, unsigned int max_quickacks) icsk->icsk_ack.quick = quickacks; } -void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks) +static void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks) { struct inet_connection_sock *icsk = inet_csk(sk); @@ -295,7 +305,6 @@ void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks) inet_csk_exit_pingpong_mode(sk); icsk->icsk_ack.ato = TCP_ATO_MIN; } -EXPORT_SYMBOL(tcp_enter_quickack_mode); /* Send ACKs quickly, if "quick" count is not exhausted * and the session is not interactive. @@ -727,8 +736,8 @@ void tcp_rcv_space_adjust(struct sock *sk) if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) && !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { - int rcvmem, rcvbuf; u64 rcvwin, grow; + int rcvbuf; /* minimal window to cope with packet losses, assuming * steady state. Add some cushion because of small variations. @@ -740,12 +749,7 @@ void tcp_rcv_space_adjust(struct sock *sk) do_div(grow, tp->rcvq_space.space); rcvwin += (grow << 1); - rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER); - while (tcp_win_from_space(sk, rcvmem) < tp->advmss) - rcvmem += 128; - - do_div(rcvwin, tp->advmss); - rcvbuf = min_t(u64, rcvwin * rcvmem, + rcvbuf = min_t(u64, tcp_space_from_win(sk, rcvwin), READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2])); if (rcvbuf > sk->sk_rcvbuf) { WRITE_ONCE(sk->sk_rcvbuf, rcvbuf); @@ -4308,10 +4312,16 @@ static inline bool tcp_paws_discard(const struct sock *sk, * (borrowed from freebsd) */ -static inline bool tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq) +static enum skb_drop_reason tcp_sequence(const struct tcp_sock *tp, + u32 seq, u32 end_seq) { - return !before(end_seq, tp->rcv_wup) && - !after(seq, tp->rcv_nxt + tcp_receive_window(tp)); + if (before(end_seq, tp->rcv_wup)) + return SKB_DROP_REASON_TCP_OLD_SEQUENCE; + + if (after(seq, tp->rcv_nxt + tcp_receive_window(tp))) + return SKB_DROP_REASON_TCP_INVALID_SEQUENCE; + + return SKB_NOT_DROPPED_YET; } /* When we get a reset we do this. */ @@ -5734,7 +5744,8 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, } /* Step 1: check sequence number */ - if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) { + reason = tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); + if (reason) { /* RFC793, page 37: "In all states except SYN-SENT, all reset * (RST) segments are validated by checking their SEQ-fields." * And page 69: "If an incoming segment is not acceptable, @@ -5751,7 +5762,6 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, } else if (tcp_reset_check(sk, skb)) { goto reset; } - SKB_DR_SET(reason, TCP_INVALID_SEQUENCE); goto discard; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a59cc4b83861..5b18a048f613 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -57,6 +57,7 @@ #include <linux/init.h> #include <linux/times.h> #include <linux/slab.h> +#include <linux/sched.h> #include <net/net_namespace.h> #include <net/icmp.h> @@ -2448,6 +2449,8 @@ static void *established_get_first(struct seq_file *seq) struct hlist_nulls_node *node; spinlock_t *lock = inet_ehash_lockp(hinfo, st->bucket); + cond_resched(); + /* Lockless fast path for the common case of empty buckets */ if (empty_bucket(hinfo, st)) continue; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index abfa860367aa..1ee9e56dc79a 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1557,7 +1557,7 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb) spin_unlock(&list->lock); if (!sock_flag(sk, SOCK_DEAD)) - sk->sk_data_ready(sk); + INDIRECT_CALL_1(sk->sk_data_ready, sock_def_readable, sk); busylock_release(busy); return 0; |