summaryrefslogtreecommitdiff
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/bpf_tcp_ca.c2
-rw-r--r--net/ipv4/nexthop.c59
-rw-r--r--net/ipv4/tcp.c11
-rw-r--r--net/ipv4/tcp_input.c38
-rw-r--r--net/ipv4/tcp_ipv4.c3
-rw-r--r--net/ipv4/udp.c2
6 files changed, 75 insertions, 40 deletions
diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
index 4406d796cc2f..39dcccf0f174 100644
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -51,8 +51,6 @@ static bool is_unsupported(u32 member_offset)
return false;
}
-extern struct btf *btf_vmlinux;
-
static bool bpf_tcp_ca_is_valid_access(int off, int size,
enum bpf_access_type type,
const struct bpf_prog *prog,
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index f95142e56da0..93f14d39fef6 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -1152,41 +1152,64 @@ static bool ipv4_good_nh(const struct fib_nh *nh)
return !!(state & NUD_VALID);
}
-static struct nexthop *nexthop_select_path_hthr(struct nh_group *nhg, int hash)
+static bool nexthop_is_good_nh(const struct nexthop *nh)
+{
+ struct nh_info *nhi = rcu_dereference(nh->nh_info);
+
+ switch (nhi->family) {
+ case AF_INET:
+ return ipv4_good_nh(&nhi->fib_nh);
+ case AF_INET6:
+ return ipv6_good_nh(&nhi->fib6_nh);
+ }
+
+ return false;
+}
+
+static struct nexthop *nexthop_select_path_fdb(struct nh_group *nhg, int hash)
{
- struct nexthop *rc = NULL;
int i;
- for (i = 0; i < nhg->num_nh; ++i) {
+ for (i = 0; i < nhg->num_nh; i++) {
struct nh_grp_entry *nhge = &nhg->nh_entries[i];
- struct nh_info *nhi;
if (hash > atomic_read(&nhge->hthr.upper_bound))
continue;
- nhi = rcu_dereference(nhge->nh->nh_info);
- if (nhi->fdb_nh)
- return nhge->nh;
+ return nhge->nh;
+ }
+
+ WARN_ON_ONCE(1);
+ return NULL;
+}
+
+static struct nexthop *nexthop_select_path_hthr(struct nh_group *nhg, int hash)
+{
+ struct nexthop *rc = NULL;
+ int i;
+
+ if (nhg->fdb_nh)
+ return nexthop_select_path_fdb(nhg, hash);
+
+ for (i = 0; i < nhg->num_nh; ++i) {
+ struct nh_grp_entry *nhge = &nhg->nh_entries[i];
/* nexthops always check if it is good and does
* not rely on a sysctl for this behavior
*/
- switch (nhi->family) {
- case AF_INET:
- if (ipv4_good_nh(&nhi->fib_nh))
- return nhge->nh;
- break;
- case AF_INET6:
- if (ipv6_good_nh(&nhi->fib6_nh))
- return nhge->nh;
- break;
- }
+ if (!nexthop_is_good_nh(nhge->nh))
+ continue;
if (!rc)
rc = nhge->nh;
+
+ if (hash > atomic_read(&nhge->hthr.upper_bound))
+ continue;
+
+ return nhge->nh;
}
- return rc;
+ return rc ? : nhg->nh_entries[0].nh;
}
static struct nexthop *nexthop_select_path_res(struct nh_group *nhg, int hash)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 8ed52e1e3c99..aca5620cf3ba 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -457,6 +457,7 @@ void tcp_init_sock(struct sock *sk)
WRITE_ONCE(sk->sk_sndbuf, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[1]));
WRITE_ONCE(sk->sk_rcvbuf, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[1]));
+ tcp_scaling_ratio_init(sk);
set_bit(SOCK_SUPPORT_ZC, &sk->sk_socket->flags);
sk_sockets_allocated_inc(sk);
@@ -1700,7 +1701,7 @@ EXPORT_SYMBOL(tcp_peek_len);
/* Make sure sk_rcvbuf is big enough to satisfy SO_RCVLOWAT hint */
int tcp_set_rcvlowat(struct sock *sk, int val)
{
- int cap;
+ int space, cap;
if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
cap = sk->sk_rcvbuf >> 1;
@@ -1715,10 +1716,10 @@ int tcp_set_rcvlowat(struct sock *sk, int val)
if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
return 0;
- val <<= 1;
- if (val > sk->sk_rcvbuf) {
- WRITE_ONCE(sk->sk_rcvbuf, val);
- tcp_sk(sk)->window_clamp = tcp_win_from_space(sk, val);
+ space = tcp_space_from_win(sk, val);
+ if (space > sk->sk_rcvbuf) {
+ WRITE_ONCE(sk->sk_rcvbuf, space);
+ tcp_sk(sk)->window_clamp = val;
}
return 0;
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 57c8af1859c1..670c3dab24f2 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -237,6 +237,16 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
*/
len = skb_shinfo(skb)->gso_size ? : skb->len;
if (len >= icsk->icsk_ack.rcv_mss) {
+ /* Note: divides are still a bit expensive.
+ * For the moment, only adjust scaling_ratio
+ * when we update icsk_ack.rcv_mss.
+ */
+ if (unlikely(len != icsk->icsk_ack.rcv_mss)) {
+ u64 val = (u64)skb->len << TCP_RMEM_TO_WIN_SCALE;
+
+ do_div(val, skb->truesize);
+ tcp_sk(sk)->scaling_ratio = val ? val : 1;
+ }
icsk->icsk_ack.rcv_mss = min_t(unsigned int, len,
tcp_sk(sk)->advmss);
/* Account for possibly-removed options */
@@ -287,7 +297,7 @@ static void tcp_incr_quickack(struct sock *sk, unsigned int max_quickacks)
icsk->icsk_ack.quick = quickacks;
}
-void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
+static void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
{
struct inet_connection_sock *icsk = inet_csk(sk);
@@ -295,7 +305,6 @@ void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
inet_csk_exit_pingpong_mode(sk);
icsk->icsk_ack.ato = TCP_ATO_MIN;
}
-EXPORT_SYMBOL(tcp_enter_quickack_mode);
/* Send ACKs quickly, if "quick" count is not exhausted
* and the session is not interactive.
@@ -727,8 +736,8 @@ void tcp_rcv_space_adjust(struct sock *sk)
if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) &&
!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
- int rcvmem, rcvbuf;
u64 rcvwin, grow;
+ int rcvbuf;
/* minimal window to cope with packet losses, assuming
* steady state. Add some cushion because of small variations.
@@ -740,12 +749,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
do_div(grow, tp->rcvq_space.space);
rcvwin += (grow << 1);
- rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
- while (tcp_win_from_space(sk, rcvmem) < tp->advmss)
- rcvmem += 128;
-
- do_div(rcvwin, tp->advmss);
- rcvbuf = min_t(u64, rcvwin * rcvmem,
+ rcvbuf = min_t(u64, tcp_space_from_win(sk, rcvwin),
READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
if (rcvbuf > sk->sk_rcvbuf) {
WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
@@ -4308,10 +4312,16 @@ static inline bool tcp_paws_discard(const struct sock *sk,
* (borrowed from freebsd)
*/
-static inline bool tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq)
+static enum skb_drop_reason tcp_sequence(const struct tcp_sock *tp,
+ u32 seq, u32 end_seq)
{
- return !before(end_seq, tp->rcv_wup) &&
- !after(seq, tp->rcv_nxt + tcp_receive_window(tp));
+ if (before(end_seq, tp->rcv_wup))
+ return SKB_DROP_REASON_TCP_OLD_SEQUENCE;
+
+ if (after(seq, tp->rcv_nxt + tcp_receive_window(tp)))
+ return SKB_DROP_REASON_TCP_INVALID_SEQUENCE;
+
+ return SKB_NOT_DROPPED_YET;
}
/* When we get a reset we do this. */
@@ -5734,7 +5744,8 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
}
/* Step 1: check sequence number */
- if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) {
+ reason = tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
+ if (reason) {
/* RFC793, page 37: "In all states except SYN-SENT, all reset
* (RST) segments are validated by checking their SEQ-fields."
* And page 69: "If an incoming segment is not acceptable,
@@ -5751,7 +5762,6 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
} else if (tcp_reset_check(sk, skb)) {
goto reset;
}
- SKB_DR_SET(reason, TCP_INVALID_SEQUENCE);
goto discard;
}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a59cc4b83861..5b18a048f613 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -57,6 +57,7 @@
#include <linux/init.h>
#include <linux/times.h>
#include <linux/slab.h>
+#include <linux/sched.h>
#include <net/net_namespace.h>
#include <net/icmp.h>
@@ -2448,6 +2449,8 @@ static void *established_get_first(struct seq_file *seq)
struct hlist_nulls_node *node;
spinlock_t *lock = inet_ehash_lockp(hinfo, st->bucket);
+ cond_resched();
+
/* Lockless fast path for the common case of empty buckets */
if (empty_bucket(hinfo, st))
continue;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index abfa860367aa..1ee9e56dc79a 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1557,7 +1557,7 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
spin_unlock(&list->lock);
if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_data_ready(sk);
+ INDIRECT_CALL_1(sk->sk_data_ready, sock_def_readable, sk);
busylock_release(busy);
return 0;