summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYuchung Cheng <ycheng@google.com>2018-07-18 23:56:35 +0300
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2018-07-28 08:43:18 +0300
commit8c11f488b6f415a9c7d91cc09fec939b256c05be (patch)
tree8a758ee9e360f372be234ef87509877957b852a6
parente76a28ca1f536df0c7f6c5631e76a1eeda8f8cef (diff)
downloadlinux-8c11f488b6f415a9c7d91cc09fec939b256c05be.tar.xz
tcp: do not cancel delay-AcK on DCTCP special ACK
[ Upstream commit 27cde44a259c380a3c09066fc4b42de7dde9b1ad ] Currently when a DCTCP receiver delays an ACK and receive a data packet with a different CE mark from the previous one's, it sends two immediate ACKs acking previous and latest sequences respectly (for ECN accounting). Previously sending the first ACK may mark off the delayed ACK timer (tcp_event_ack_sent). This may subsequently prevent sending the second ACK to acknowledge the latest sequence (tcp_ack_snd_check). The culprit is that tcp_send_ack() assumes it always acknowleges the latest sequence, which is not true for the first special ACK. The fix is to not make the assumption in tcp_send_ack and check the actual ack sequence before cancelling the delayed ACK. Further it's safer to pass the ack sequence number as a local variable into tcp_send_ack routine, instead of intercepting tp->rcv_nxt to avoid future bugs like this. Reported-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: Yuchung Cheng <ycheng@google.com> Acked-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-rw-r--r--include/net/tcp.h1
-rw-r--r--net/ipv4/tcp_dctcp.c34
-rw-r--r--net/ipv4/tcp_output.c11
3 files changed, 13 insertions, 33 deletions
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 15564a2cc28c..521490fd8928 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -530,6 +530,7 @@ int tcp_send_synack(struct sock *);
bool tcp_syn_flood_action(struct sock *sk, const struct sk_buff *skb,
const char *proto);
void tcp_push_one(struct sock *, unsigned int mss_now);
+void __tcp_send_ack(struct sock *sk, u32 rcv_nxt);
void tcp_send_ack(struct sock *sk);
void tcp_send_delayed_ack(struct sock *sk);
void tcp_send_loss_probe(struct sock *sk);
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index 33e74cb51b89..faab1c0e4c26 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -132,21 +132,8 @@ static void dctcp_ce_state_0_to_1(struct sock *sk)
* ACK has not sent yet.
*/
if (!ca->ce_state &&
- inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) {
- u32 tmp_rcv_nxt;
-
- /* Save current rcv_nxt. */
- tmp_rcv_nxt = tp->rcv_nxt;
-
- /* Generate previous ack with CE=0. */
- tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
- tp->rcv_nxt = ca->prior_rcv_nxt;
-
- tcp_send_ack(sk);
-
- /* Recover current rcv_nxt. */
- tp->rcv_nxt = tmp_rcv_nxt;
- }
+ inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
+ __tcp_send_ack(sk, ca->prior_rcv_nxt);
ca->prior_rcv_nxt = tp->rcv_nxt;
ca->ce_state = 1;
@@ -163,21 +150,8 @@ static void dctcp_ce_state_1_to_0(struct sock *sk)
* ACK has not sent yet.
*/
if (ca->ce_state &&
- inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) {
- u32 tmp_rcv_nxt;
-
- /* Save current rcv_nxt. */
- tmp_rcv_nxt = tp->rcv_nxt;
-
- /* Generate previous ack with CE=1. */
- tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
- tp->rcv_nxt = ca->prior_rcv_nxt;
-
- tcp_send_ack(sk);
-
- /* Recover current rcv_nxt. */
- tp->rcv_nxt = tmp_rcv_nxt;
- }
+ inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
+ __tcp_send_ack(sk, ca->prior_rcv_nxt);
ca->prior_rcv_nxt = tp->rcv_nxt;
ca->ce_state = 0;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 371ea9e76bd1..a3c701e079fd 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -183,8 +183,13 @@ static void tcp_event_data_sent(struct tcp_sock *tp,
}
/* Account for an ACK we sent. */
-static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
+static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts,
+ u32 rcv_nxt)
{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (unlikely(rcv_nxt != tp->rcv_nxt))
+ return; /* Special ACK sent by DCTCP to reflect ECN */
tcp_dec_quickack_mode(sk, pkts);
inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
}
@@ -990,7 +995,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
icsk->icsk_af_ops->send_check(sk, skb);
if (likely(tcb->tcp_flags & TCPHDR_ACK))
- tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
+ tcp_event_ack_sent(sk, tcp_skb_pcount(skb), rcv_nxt);
if (skb->len != tcp_header_size)
tcp_event_data_sent(tp, sk);
@@ -3258,12 +3263,12 @@ void __tcp_send_ack(struct sock *sk, u32 rcv_nxt)
skb_mstamp_get(&buff->skb_mstamp);
__tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC), rcv_nxt);
}
+EXPORT_SYMBOL_GPL(__tcp_send_ack);
void tcp_send_ack(struct sock *sk)
{
__tcp_send_ack(sk, tcp_sk(sk)->rcv_nxt);
}
-EXPORT_SYMBOL_GPL(tcp_send_ack);
/* This routine sends a packet with an out of date sequence
* number. It assumes the other end will try to ack it.