summaryrefslogtreecommitdiff
path: root/net/ipv4/tcp_input.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r--net/ipv4/tcp_input.c132
1 files changed, 75 insertions, 57 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 31f2765ef851..9739962bfb3f 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -442,7 +442,8 @@ void tcp_init_buffer_space(struct sock *sk)
tcp_sndbuf_expand(sk);
tp->rcvq_space.space = tp->rcv_wnd;
- tp->rcvq_space.time = tcp_time_stamp;
+ skb_mstamp_get(&tp->tcp_mstamp);
+ tp->rcvq_space.time = tp->tcp_mstamp;
tp->rcvq_space.seq = tp->copied_seq;
maxwin = tcp_full_space(sk);
@@ -518,7 +519,7 @@ EXPORT_SYMBOL(tcp_initialize_rcv_mss);
*/
static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
{
- u32 new_sample = tp->rcv_rtt_est.rtt;
+ u32 new_sample = tp->rcv_rtt_est.rtt_us;
long m = sample;
if (m == 0)
@@ -548,21 +549,23 @@ static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
new_sample = m << 3;
}
- if (tp->rcv_rtt_est.rtt != new_sample)
- tp->rcv_rtt_est.rtt = new_sample;
+ tp->rcv_rtt_est.rtt_us = new_sample;
}
static inline void tcp_rcv_rtt_measure(struct tcp_sock *tp)
{
- if (tp->rcv_rtt_est.time == 0)
+ u32 delta_us;
+
+ if (tp->rcv_rtt_est.time.v64 == 0)
goto new_measure;
if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq))
return;
- tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rcv_rtt_est.time, 1);
+ delta_us = skb_mstamp_us_delta(&tp->tcp_mstamp, &tp->rcv_rtt_est.time);
+ tcp_rcv_rtt_update(tp, delta_us, 1);
new_measure:
tp->rcv_rtt_est.seq = tp->rcv_nxt + tp->rcv_wnd;
- tp->rcv_rtt_est.time = tcp_time_stamp;
+ tp->rcv_rtt_est.time = tp->tcp_mstamp;
}
static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
@@ -572,7 +575,10 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
if (tp->rx_opt.rcv_tsecr &&
(TCP_SKB_CB(skb)->end_seq -
TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss))
- tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rx_opt.rcv_tsecr, 0);
+ tcp_rcv_rtt_update(tp,
+ jiffies_to_usecs(tcp_time_stamp -
+ tp->rx_opt.rcv_tsecr),
+ 0);
}
/*
@@ -585,8 +591,8 @@ void tcp_rcv_space_adjust(struct sock *sk)
int time;
int copied;
- time = tcp_time_stamp - tp->rcvq_space.time;
- if (time < (tp->rcv_rtt_est.rtt >> 3) || tp->rcv_rtt_est.rtt == 0)
+ time = skb_mstamp_us_delta(&tp->tcp_mstamp, &tp->rcvq_space.time);
+ if (time < (tp->rcv_rtt_est.rtt_us >> 3) || tp->rcv_rtt_est.rtt_us == 0)
return;
/* Number of bytes copied to user in last RTT */
@@ -642,7 +648,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
new_measure:
tp->rcvq_space.seq = tp->copied_seq;
- tp->rcvq_space.time = tcp_time_stamp;
+ tp->rcvq_space.time = tp->tcp_mstamp;
}
/* There is something which you must keep in mind when you analyze the
@@ -1131,7 +1137,6 @@ struct tcp_sacktag_state {
*/
struct skb_mstamp first_sackt;
struct skb_mstamp last_sackt;
- struct skb_mstamp ack_time; /* Timestamp when the S/ACK was received */
struct rate_sample *rate;
int flag;
};
@@ -1214,8 +1219,7 @@ static u8 tcp_sacktag_one(struct sock *sk,
return sacked;
if (!(sacked & TCPCB_SACKED_ACKED)) {
- tcp_rack_advance(tp, sacked, end_seq,
- xmit_time, &state->ack_time);
+ tcp_rack_advance(tp, sacked, end_seq, xmit_time);
if (sacked & TCPCB_SACKED_RETRANS) {
/* If the segment is not tagged as lost,
@@ -1935,6 +1939,7 @@ void tcp_enter_loss(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
struct net *net = sock_net(sk);
struct sk_buff *skb;
+ bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery;
bool is_reneg; /* is receiver reneging on SACKs? */
bool mark_lost;
@@ -1994,15 +1999,18 @@ void tcp_enter_loss(struct sock *sk)
tp->high_seq = tp->snd_nxt;
tcp_ecn_queue_cwr(tp);
- /* F-RTO RFC5682 sec 3.1 step 1 mandates to disable F-RTO
- * if a previous recovery is underway, otherwise it may incorrectly
- * call a timeout spurious if some previously retransmitted packets
- * are s/acked (sec 3.2). We do not apply that retriction since
- * retransmitted skbs are permanently tagged with TCPCB_EVER_RETRANS
- * so FLAG_ORIG_SACK_ACKED is always correct. But we do disable F-RTO
- * on PTMU discovery to avoid sending new data.
+ /* F-RTO RFC5682 sec 3.1 step 1: retransmit SND.UNA if no previous
+ * loss recovery is underway except recurring timeout(s) on
+ * the same SND.UNA (sec 3.2). Disable F-RTO on path MTU probing
+ *
+ * In theory F-RTO can be used repeatedly during loss recovery.
+ * In practice this interacts badly with broken middle-boxes that
+ * falsely raise the receive window, which results in repeated
+ * timeouts and stop-and-go behavior.
*/
- tp->frto = sysctl_tcp_frto && !inet_csk(sk)->icsk_mtup.probe_size;
+ tp->frto = sysctl_tcp_frto &&
+ (new_recovery || icsk->icsk_retransmits) &&
+ !inet_csk(sk)->icsk_mtup.probe_size;
}
/* If ACK arrived pointing to a remembered SACK, it means that our
@@ -2756,8 +2764,7 @@ static bool tcp_try_undo_partial(struct sock *sk, const int acked)
return false;
}
-static void tcp_rack_identify_loss(struct sock *sk, int *ack_flag,
- const struct skb_mstamp *ack_time)
+static void tcp_rack_identify_loss(struct sock *sk, int *ack_flag)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -2765,7 +2772,7 @@ static void tcp_rack_identify_loss(struct sock *sk, int *ack_flag,
if (sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION) {
u32 prior_retrans = tp->retrans_out;
- tcp_rack_mark_lost(sk, ack_time);
+ tcp_rack_mark_lost(sk);
if (prior_retrans > tp->retrans_out)
*ack_flag |= FLAG_LOST_RETRANS;
}
@@ -2784,8 +2791,7 @@ static void tcp_rack_identify_loss(struct sock *sk, int *ack_flag,
* tcp_xmit_retransmit_queue().
*/
static void tcp_fastretrans_alert(struct sock *sk, const int acked,
- bool is_dupack, int *ack_flag, int *rexmit,
- const struct skb_mstamp *ack_time)
+ bool is_dupack, int *ack_flag, int *rexmit)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
@@ -2853,11 +2859,11 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
tcp_try_keep_open(sk);
return;
}
- tcp_rack_identify_loss(sk, ack_flag, ack_time);
+ tcp_rack_identify_loss(sk, ack_flag);
break;
case TCP_CA_Loss:
tcp_process_loss(sk, flag, is_dupack, rexmit);
- tcp_rack_identify_loss(sk, ack_flag, ack_time);
+ tcp_rack_identify_loss(sk, ack_flag);
if (!(icsk->icsk_ca_state == TCP_CA_Open ||
(*ack_flag & FLAG_LOST_RETRANS)))
return;
@@ -2873,7 +2879,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
if (icsk->icsk_ca_state <= TCP_CA_Disorder)
tcp_try_undo_dsack(sk);
- tcp_rack_identify_loss(sk, ack_flag, ack_time);
+ tcp_rack_identify_loss(sk, ack_flag);
if (!tcp_time_to_recover(sk, flag)) {
tcp_try_to_open(sk, flag);
return;
@@ -3055,8 +3061,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
{
const struct inet_connection_sock *icsk = inet_csk(sk);
struct skb_mstamp first_ackt, last_ackt;
- struct skb_mstamp *now = &sack->ack_time;
struct tcp_sock *tp = tcp_sk(sk);
+ struct skb_mstamp *now = &tp->tcp_mstamp;
u32 prior_sacked = tp->sacked_out;
u32 reord = tp->packets_out;
bool fully_acked = true;
@@ -3116,8 +3122,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
tp->delivered += acked_pcount;
if (!tcp_skb_spurious_retrans(tp, skb))
tcp_rack_advance(tp, sacked, scb->end_seq,
- &skb->skb_mstamp,
- &sack->ack_time);
+ &skb->skb_mstamp);
}
if (sacked & TCPCB_LOST)
tp->lost_out -= acked_pcount;
@@ -3572,8 +3577,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
if (after(ack, tp->snd_nxt))
goto invalid_ack;
- skb_mstamp_get(&sack_state.ack_time);
-
if (icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
tcp_rearm_rto(sk);
@@ -3643,8 +3646,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
if (tcp_ack_is_dubious(sk, flag)) {
is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
- tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit,
- &sack_state.ack_time);
+ tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
}
if (tp->tlp_high_seq)
tcp_process_tlp_ack(sk, ack, flag);
@@ -3656,8 +3658,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
tcp_schedule_loss_probe(sk);
delivered = tp->delivered - delivered; /* freshly ACKed or SACKed */
lost = tp->lost - lost; /* freshly marked lost */
- tcp_rate_gen(sk, delivered, lost, &sack_state.ack_time,
- sack_state.rate);
+ tcp_rate_gen(sk, delivered, lost, sack_state.rate);
tcp_cong_control(sk, ack, delivered, flag, sack_state.rate);
tcp_xmit_recovery(sk, rexmit);
return 1;
@@ -3665,8 +3666,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
no_queue:
/* If data was DSACKed, see if we can undo a cwnd reduction. */
if (flag & FLAG_DSACKING_ACK)
- tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit,
- &sack_state.ack_time);
+ tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
/* If this ack opens up a zero window, clear backoff. It was
* being used to time the probes, and is probably far higher than
* it needs to be for normal retransmission.
@@ -3687,11 +3687,9 @@ old_ack:
* If data was DSACKed, see if we can undo a cwnd reduction.
*/
if (TCP_SKB_CB(skb)->sacked) {
- skb_mstamp_get(&sack_state.ack_time);
flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
&sack_state);
- tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit,
- &sack_state.ack_time);
+ tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
tcp_xmit_recovery(sk, rexmit);
}
@@ -4004,10 +4002,10 @@ void tcp_reset(struct sock *sk)
/* This barrier is coupled with smp_rmb() in tcp_poll() */
smp_wmb();
+ tcp_done(sk);
+
if (!sock_flag(sk, SOCK_DEAD))
sk->sk_error_report(sk);
-
- tcp_done(sk);
}
/*
@@ -5296,8 +5294,16 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
if (rst_seq_match)
tcp_reset(sk);
- else
+ else {
+ /* Disable TFO if RST is out-of-order
+ * and no data has been received
+ * for current active TFO socket
+ */
+ if (tp->syn_fastopen && !tp->data_segs_in &&
+ sk->sk_state == TCP_ESTABLISHED)
+ tcp_fastopen_active_disable(sk);
tcp_send_challenge_ack(sk, skb);
+ }
goto discard;
}
@@ -5350,6 +5356,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
{
struct tcp_sock *tp = tcp_sk(sk);
+ skb_mstamp_get(&tp->tcp_mstamp);
if (unlikely(!sk->sk_rx_dst))
inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb);
/*
@@ -5576,10 +5583,6 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
else
tp->pred_flags = 0;
- if (!sock_flag(sk, SOCK_DEAD)) {
- sk->sk_state_change(sk);
- sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
- }
}
static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
@@ -5648,6 +5651,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_fastopen_cookie foc = { .len = -1 };
int saved_clamp = tp->rx_opt.mss_clamp;
+ bool fastopen_fail;
tcp_parse_options(skb, &tp->rx_opt, 0, &foc);
if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
@@ -5751,10 +5755,15 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
tcp_finish_connect(sk, skb);
- if ((tp->syn_fastopen || tp->syn_data) &&
- tcp_rcv_fastopen_synack(sk, skb, &foc))
- return -1;
+ fastopen_fail = (tp->syn_fastopen || tp->syn_data) &&
+ tcp_rcv_fastopen_synack(sk, skb, &foc);
+ if (!sock_flag(sk, SOCK_DEAD)) {
+ sk->sk_state_change(sk);
+ sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
+ }
+ if (fastopen_fail)
+ return -1;
if (sk->sk_write_pending ||
icsk->icsk_accept_queue.rskq_defer_accept ||
icsk->icsk_ack.pingpong) {
@@ -5908,6 +5917,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
case TCP_SYN_SENT:
tp->rx_opt.saw_tstamp = 0;
+ skb_mstamp_get(&tp->tcp_mstamp);
queued = tcp_rcv_synsent_state_process(sk, skb, th);
if (queued >= 0)
return queued;
@@ -5919,6 +5929,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
return 0;
}
+ skb_mstamp_get(&tp->tcp_mstamp);
tp->rx_opt.saw_tstamp = 0;
req = tp->fastopen_rsk;
if (req) {
@@ -6038,9 +6049,16 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
break;
}
- if (tp->linger2 < 0 ||
- (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
- after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) {
+ if (tp->linger2 < 0) {
+ tcp_done(sk);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
+ return 1;
+ }
+ if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
+ after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
+ /* Receive out of order FIN after close() */
+ if (tp->syn_fastopen && th->fin)
+ tcp_fastopen_active_disable(sk);
tcp_done(sk);
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
return 1;