diff options
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r-- | net/ipv4/tcp_output.c | 46 |
1 files changed, 27 insertions, 19 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index f0723460753c..909f85aefd74 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -170,10 +170,10 @@ static void tcp_event_data_sent(struct tcp_sock *tp, tp->lsndtime = now; /* If it is a reply for ato after last received - * packet, enter pingpong mode. + * packet, increase pingpong count. */ if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato) - inet_csk_enter_pingpong_mode(sk); + inet_csk_inc_pingpong_cnt(sk); } /* Account for an ACK we sent. */ @@ -1076,7 +1076,8 @@ static void tcp_tasklet_func(struct tasklet_struct *t) #define TCP_DEFERRED_ALL (TCPF_TSQ_DEFERRED | \ TCPF_WRITE_TIMER_DEFERRED | \ TCPF_DELACK_TIMER_DEFERRED | \ - TCPF_MTU_REDUCED_DEFERRED) + TCPF_MTU_REDUCED_DEFERRED | \ + TCPF_ACK_DEFERRED) /** * tcp_release_cb - tcp release_sock() callback * @sk: socket @@ -1100,16 +1101,6 @@ void tcp_release_cb(struct sock *sk) tcp_tsq_write(sk); __sock_put(sk); } - /* Here begins the tricky part : - * We are called from release_sock() with : - * 1) BH disabled - * 2) sk_lock.slock spinlock held - * 3) socket owned by us (sk->sk_lock.owned == 1) - * - * But following code is meant to be called from BH handlers, - * so we should keep BH disabled, but early release socket ownership - */ - sock_release_ownership(sk); if (flags & TCPF_WRITE_TIMER_DEFERRED) { tcp_write_timer_handler(sk); @@ -1123,6 +1114,8 @@ void tcp_release_cb(struct sock *sk) inet_csk(sk)->icsk_af_ops->mtu_reduced(sk); __sock_put(sk); } + if ((flags & TCPF_ACK_DEFERRED) && inet_csk_ack_scheduled(sk)) + tcp_send_ack(sk); } EXPORT_SYMBOL(tcp_release_cb); @@ -1207,7 +1200,7 @@ static void tcp_update_skb_after_send(struct sock *sk, struct sk_buff *skb, struct tcp_sock *tp = tcp_sk(sk); if (sk->sk_pacing_status != SK_PACING_NONE) { - unsigned long rate = sk->sk_pacing_rate; + unsigned long rate = READ_ONCE(sk->sk_pacing_rate); /* Original sch_fq does not pace first 10 MSS * Note that tp->data_segs_out overflows after 2^32 packets, @@ -1331,7 +1324,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, skb->destructor = skb_is_tcp_pure_ack(skb) ? __sock_wfree : tcp_wfree; refcount_add(skb->truesize, &sk->sk_wmem_alloc); - skb_set_dst_pending_confirm(skb, sk->sk_dst_pending_confirm); + skb_set_dst_pending_confirm(skb, READ_ONCE(sk->sk_dst_pending_confirm)); /* Build TCP header and checksum it. */ th = (struct tcphdr *)skb->data; @@ -1979,7 +1972,7 @@ static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, unsigned long bytes; u32 r; - bytes = sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift); + bytes = READ_ONCE(sk->sk_pacing_rate) >> READ_ONCE(sk->sk_pacing_shift); r = tcp_min_rtt(tcp_sk(sk)) >> READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tso_rtt_log); if (r < BITS_PER_TYPE(sk->sk_gso_max_size)) @@ -2572,7 +2565,7 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, limit = max_t(unsigned long, 2 * skb->truesize, - sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift)); + READ_ONCE(sk->sk_pacing_rate) >> READ_ONCE(sk->sk_pacing_shift)); if (sk->sk_pacing_status == SK_PACING_NONE) limit = min_t(unsigned long, limit, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes)); @@ -2580,7 +2573,8 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, if (static_branch_unlikely(&tcp_tx_delay_enabled) && tcp_sk(sk)->tcp_tx_delay) { - u64 extra_bytes = (u64)sk->sk_pacing_rate * tcp_sk(sk)->tcp_tx_delay; + u64 extra_bytes = (u64)READ_ONCE(sk->sk_pacing_rate) * + tcp_sk(sk)->tcp_tx_delay; /* TSQ is based on skb truesize sum (sk_wmem_alloc), so we * approximate our needs assuming an ~100% skb->truesize overhead. @@ -3997,6 +3991,20 @@ int tcp_connect(struct sock *sk) } EXPORT_SYMBOL(tcp_connect); +u32 tcp_delack_max(const struct sock *sk) +{ + const struct dst_entry *dst = __sk_dst_get(sk); + u32 delack_max = inet_csk(sk)->icsk_delack_max; + + if (dst && dst_metric_locked(dst, RTAX_RTO_MIN)) { + u32 rto_min = dst_metric_rtt(dst, RTAX_RTO_MIN); + u32 delack_from_rto_min = max_t(int, 1, rto_min - 1); + + delack_max = min_t(u32, delack_max, delack_from_rto_min); + } + return delack_max; +} + /* Send out a delayed ack, the caller does the policy checking * to see if we should even be here. See tcp_input.c:tcp_ack_snd_check() * for details. @@ -4032,7 +4040,7 @@ void tcp_send_delayed_ack(struct sock *sk) ato = min(ato, max_ato); } - ato = min_t(u32, ato, inet_csk(sk)->icsk_delack_max); + ato = min_t(u32, ato, tcp_delack_max(sk)); /* Stay within the limit we were given */ timeout = jiffies + ato; |