summaryrefslogtreecommitdiff
path: root/net/ipv4/tcp_output.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r--net/ipv4/tcp_output.c93
1 files changed, 48 insertions, 45 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 597dbd749f05..059b67af28b1 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -45,6 +45,22 @@
#include <trace/events/tcp.h>
+/* Refresh clocks of a TCP socket,
+ * ensuring monotically increasing values.
+ */
+void tcp_mstamp_refresh(struct tcp_sock *tp)
+{
+ u64 val = tcp_clock_ns();
+
+ /* departure time for next data packet */
+ if (val > tp->tcp_wstamp_ns)
+ tp->tcp_wstamp_ns = val;
+
+ val = div_u64(val, NSEC_PER_USEC);
+ if (val > tp->tcp_mstamp)
+ tp->tcp_mstamp = val;
+}
+
static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
int push_one, gfp_t gfp);
@@ -179,21 +195,6 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts,
inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
}
-
-u32 tcp_default_init_rwnd(u32 mss)
-{
- /* Initial receive window should be twice of TCP_INIT_CWND to
- * enable proper sending of new unsent data during fast recovery
- * (RFC 3517, Section 4, NextSeg() rule (2)). Further place a
- * limit when mss is larger than 1460.
- */
- u32 init_rwnd = TCP_INIT_CWND * 2;
-
- if (mss > 1460)
- init_rwnd = max((1460 * init_rwnd) / mss, 2U);
- return init_rwnd;
-}
-
/* Determine a window scaling and initial window to offer.
* Based on the assumption that the given amount of space
* will be offered. Store the results in the tp structure.
@@ -228,7 +229,10 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
if (sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows)
(*rcv_wnd) = min(space, MAX_TCP_WINDOW);
else
- (*rcv_wnd) = space;
+ (*rcv_wnd) = min_t(u32, space, U16_MAX);
+
+ if (init_rcv_wnd)
+ *rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss);
(*rcv_wscale) = 0;
if (wscale_ok) {
@@ -241,11 +245,6 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
(*rcv_wscale)++;
}
}
-
- if (!init_rcv_wnd) /* Use default unless specified otherwise */
- init_rcv_wnd = tcp_default_init_rwnd(mss);
- *rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss);
-
/* Set the clamp no higher than max representable value */
(*window_clamp) = min_t(__u32, U16_MAX << (*rcv_wscale), *window_clamp);
}
@@ -977,28 +976,34 @@ enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer)
return HRTIMER_NORESTART;
}
-static void tcp_internal_pacing(struct sock *sk, const struct sk_buff *skb)
+static void tcp_internal_pacing(struct sock *sk)
{
- u64 len_ns;
- u32 rate;
-
if (!tcp_needs_internal_pacing(sk))
return;
- rate = sk->sk_pacing_rate;
- if (!rate || rate == ~0U)
- return;
-
- len_ns = (u64)skb->len * NSEC_PER_SEC;
- do_div(len_ns, rate);
hrtimer_start(&tcp_sk(sk)->pacing_timer,
- ktime_add_ns(ktime_get(), len_ns),
+ ns_to_ktime(tcp_sk(sk)->tcp_wstamp_ns),
HRTIMER_MODE_ABS_PINNED_SOFT);
sock_hold(sk);
}
-static void tcp_update_skb_after_send(struct tcp_sock *tp, struct sk_buff *skb)
+static void tcp_update_skb_after_send(struct sock *sk, struct sk_buff *skb)
{
- skb->skb_mstamp = tp->tcp_mstamp;
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ skb->skb_mstamp_ns = tp->tcp_wstamp_ns;
+ if (sk->sk_pacing_status != SK_PACING_NONE) {
+ u32 rate = sk->sk_pacing_rate;
+
+ /* Original sch_fq does not pace first 10 MSS
+ * Note that tp->data_segs_out overflows after 2^32 packets,
+ * this is a minor annoyance.
+ */
+ if (rate != ~0U && rate && tp->data_segs_out >= 10) {
+ tp->tcp_wstamp_ns += div_u64((u64)skb->len * NSEC_PER_SEC, rate);
+
+ tcp_internal_pacing(sk);
+ }
+ }
list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
}
@@ -1045,7 +1050,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
if (unlikely(!skb))
return -ENOBUFS;
}
- skb->skb_mstamp = tp->tcp_mstamp;
+ skb->skb_mstamp_ns = tp->tcp_wstamp_ns;
inet = inet_sk(sk);
tcb = TCP_SKB_CB(skb);
@@ -1137,7 +1142,6 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
tcp_event_data_sent(tp, sk);
tp->data_segs_out += tcp_skb_pcount(skb);
tp->bytes_sent += skb->len - tcp_header_size;
- tcp_internal_pacing(sk, skb);
}
if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
@@ -1149,8 +1153,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
skb_shinfo(skb)->gso_segs = tcp_skb_pcount(skb);
skb_shinfo(skb)->gso_size = tcp_skb_mss(skb);
- /* Our usage of tstamp should remain private */
- skb->tstamp = 0;
+ /* Leave earliest departure time in skb->tstamp (skb->skb_mstamp_ns) */
/* Cleanup our debris for IP stacks */
memset(skb->cb, 0, max(sizeof(struct inet_skb_parm),
@@ -1163,7 +1166,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
err = net_xmit_eval(err);
}
if (!err && oskb) {
- tcp_update_skb_after_send(tp, oskb);
+ tcp_update_skb_after_send(sk, oskb);
tcp_rate_skb_sent(sk, oskb);
}
return err;
@@ -1966,7 +1969,7 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
head = tcp_rtx_queue_head(sk);
if (!head)
goto send_now;
- age = tcp_stamp_us_delta(tp->tcp_mstamp, head->skb_mstamp);
+ age = tcp_stamp_us_delta(tp->tcp_mstamp, tcp_skb_timestamp_us(head));
/* If next ACK is likely to come too late (half srtt), do not defer */
if (age < (tp->srtt_us >> 4))
goto send_now;
@@ -2312,7 +2315,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) {
/* "skb_mstamp" is used as a start point for the retransmit timer */
- tcp_update_skb_after_send(tp, skb);
+ tcp_update_skb_after_send(sk, skb);
goto repair; /* Skip network transmission */
}
@@ -2887,7 +2890,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
} tcp_skb_tsorted_restore(skb);
if (!err) {
- tcp_update_skb_after_send(tp, skb);
+ tcp_update_skb_after_send(sk, skb);
tcp_rate_skb_sent(sk, skb);
}
} else {
@@ -3205,10 +3208,10 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
memset(&opts, 0, sizeof(opts));
#ifdef CONFIG_SYN_COOKIES
if (unlikely(req->cookie_ts))
- skb->skb_mstamp = cookie_init_timestamp(req);
+ skb->skb_mstamp_ns = cookie_init_timestamp(req);
else
#endif
- skb->skb_mstamp = tcp_clock_us();
+ skb->skb_mstamp_ns = tcp_clock_ns();
#ifdef CONFIG_TCP_MD5SIG
rcu_read_lock();
@@ -3424,7 +3427,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
err = tcp_transmit_skb(sk, syn_data, 1, sk->sk_allocation);
- syn->skb_mstamp = syn_data->skb_mstamp;
+ syn->skb_mstamp_ns = syn_data->skb_mstamp_ns;
/* Now full SYN+DATA was cloned and sent (or not),
* remove the SYN from the original skb (syn_data)