summaryrefslogtreecommitdiff
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/Kconfig1
-rw-r--r--net/ipv4/af_inet.c11
-rw-r--r--net/ipv4/arp.c7
-rw-r--r--net/ipv4/datagram.c7
-rw-r--r--net/ipv4/devinet.c2
-rw-r--r--net/ipv4/esp4.c6
-rw-r--r--net/ipv4/fib_frontend.c4
-rw-r--r--net/ipv4/fib_rules.c2
-rw-r--r--net/ipv4/fib_semantics.c4
-rw-r--r--net/ipv4/fib_trie.c12
-rw-r--r--net/ipv4/fou.c1
-rw-r--r--net/ipv4/icmp.c77
-rw-r--r--net/ipv4/igmp.c4
-rw-r--r--net/ipv4/inet_connection_sock.c12
-rw-r--r--net/ipv4/inet_diag.c5
-rw-r--r--net/ipv4/inet_fragment.c2
-rw-r--r--net/ipv4/inet_hashtables.c136
-rw-r--r--net/ipv4/ip_forward.c13
-rw-r--r--net/ipv4/ip_gre.c50
-rw-r--r--net/ipv4/ip_input.c1
-rw-r--r--net/ipv4/ipmr.c2
-rw-r--r--net/ipv4/netfilter/nf_reject_ipv4.c10
-rw-r--r--net/ipv4/netfilter/nft_fib_ipv4.c4
-rw-r--r--net/ipv4/ping.c40
-rw-r--r--net/ipv4/raw.c6
-rw-r--r--net/ipv4/route.c51
-rw-r--r--net/ipv4/sysctl_net_ipv4.c16
-rw-r--r--net/ipv4/tcp.c69
-rw-r--r--net/ipv4/tcp_bbr.c22
-rw-r--r--net/ipv4/tcp_bic.c14
-rw-r--r--net/ipv4/tcp_bpf.c15
-rw-r--r--net/ipv4/tcp_cdg.c30
-rw-r--r--net/ipv4/tcp_cong.c30
-rw-r--r--net/ipv4/tcp_cubic.c26
-rw-r--r--net/ipv4/tcp_dctcp.c11
-rw-r--r--net/ipv4/tcp_highspeed.c18
-rw-r--r--net/ipv4/tcp_htcp.c10
-rw-r--r--net/ipv4/tcp_hybla.c18
-rw-r--r--net/ipv4/tcp_illinois.c12
-rw-r--r--net/ipv4/tcp_input.c167
-rw-r--r--net/ipv4/tcp_ipv4.c30
-rw-r--r--net/ipv4/tcp_lp.c6
-rw-r--r--net/ipv4/tcp_metrics.c12
-rw-r--r--net/ipv4/tcp_nv.c24
-rw-r--r--net/ipv4/tcp_output.c46
-rw-r--r--net/ipv4/tcp_rate.c2
-rw-r--r--net/ipv4/tcp_recovery.c15
-rw-r--r--net/ipv4/tcp_scalable.c4
-rw-r--r--net/ipv4/tcp_vegas.c21
-rw-r--r--net/ipv4/tcp_veno.c24
-rw-r--r--net/ipv4/tcp_westwood.c3
-rw-r--r--net/ipv4/tcp_yeah.c30
-rw-r--r--net/ipv4/udp.c16
-rw-r--r--net/ipv4/udp_bpf.c17
-rw-r--r--net/ipv4/udp_impl.h4
55 files changed, 573 insertions, 609 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 87983e70f03f..e983bb0c5012 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -321,7 +321,6 @@ config NET_UDP_TUNNEL
config NET_FOU
tristate "IP: Foo (IP protocols) over UDP"
- select XFRM
select NET_UDP_TUNNEL
help
Foo over UDP allows any IP protocol to be directly encapsulated
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 72fde2888ad2..93da9f783bec 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -836,7 +836,7 @@ ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
EXPORT_SYMBOL(inet_sendpage);
INDIRECT_CALLABLE_DECLARE(int udp_recvmsg(struct sock *, struct msghdr *,
- size_t, int, int, int *));
+ size_t, int, int *));
int inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
int flags)
{
@@ -848,8 +848,7 @@ int inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
sock_rps_record_flow(sk);
err = INDIRECT_CALL_2(sk->sk_prot->recvmsg, tcp_recvmsg, udp_recvmsg,
- sk, msg, size, flags & MSG_DONTWAIT,
- flags & ~MSG_DONTWAIT, &addr_len);
+ sk, msg, size, flags, &addr_len);
if (err >= 0)
msg->msg_namelen = addr_len;
return err;
@@ -1234,9 +1233,9 @@ static int inet_sk_reselect_saddr(struct sock *sk)
/* Query new route. */
fl4 = &inet->cork.fl.u.ip4;
- rt = ip_route_connect(fl4, daddr, 0, RT_CONN_FLAGS(sk),
- sk->sk_bound_dev_if, sk->sk_protocol,
- inet->inet_sport, inet->inet_dport, sk);
+ rt = ip_route_connect(fl4, daddr, 0, sk->sk_bound_dev_if,
+ sk->sk_protocol, inet->inet_sport,
+ inet->inet_dport, sk);
if (IS_ERR(rt))
return PTR_ERR(rt);
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 2d0c05ca9c6f..ab4a5601c82a 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1304,9 +1304,9 @@ static struct packet_type arp_packet_type __read_mostly = {
.func = arp_rcv,
};
+#ifdef CONFIG_PROC_FS
#if IS_ENABLED(CONFIG_AX25)
-/* ------------------------------------------------------------------------ */
/*
* ax25 -> ASCII conversion
*/
@@ -1412,16 +1412,13 @@ static void *arp_seq_start(struct seq_file *seq, loff_t *pos)
return neigh_seq_start(seq, pos, &arp_tbl, NEIGH_SEQ_SKIP_NOARP);
}
-/* ------------------------------------------------------------------------ */
-
static const struct seq_operations arp_seq_ops = {
.start = arp_seq_start,
.next = neigh_seq_next,
.stop = neigh_seq_stop,
.show = arp_seq_show,
};
-
-/* ------------------------------------------------------------------------ */
+#endif /* CONFIG_PROC_FS */
static int __net_init arp_net_init(struct net *net)
{
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 48f337ccf949..ffd57523331f 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -44,10 +44,9 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
saddr = inet->mc_addr;
}
fl4 = &inet->cork.fl.u.ip4;
- rt = ip_route_connect(fl4, usin->sin_addr.s_addr, saddr,
- RT_CONN_FLAGS(sk), oif,
- sk->sk_protocol,
- inet->inet_sport, usin->sin_port, sk);
+ rt = ip_route_connect(fl4, usin->sin_addr.s_addr, saddr, oif,
+ sk->sk_protocol, inet->inet_sport,
+ usin->sin_port, sk);
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
if (err == -ENETUNREACH)
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 53a6b14dc50a..89141ba5e9ee 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -2573,7 +2573,7 @@ static int __devinet_sysctl_register(struct net *net, char *dev_name,
struct devinet_sysctl_table *t;
char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
- t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
+ t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL_ACCOUNT);
if (!t)
goto out;
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index d747166bb291..b21238df3301 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -705,7 +705,6 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
static inline int esp_remove_trailer(struct sk_buff *skb)
{
struct xfrm_state *x = xfrm_input_state(skb);
- struct xfrm_offload *xo = xfrm_offload(skb);
struct crypto_aead *aead = x->data;
int alen, hlen, elen;
int padlen, trimlen;
@@ -717,11 +716,6 @@ static inline int esp_remove_trailer(struct sk_buff *skb)
hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead);
elen = skb->len - hlen;
- if (xo && (xo->flags & XFRM_ESP_NO_TRAILER)) {
- ret = xo->proto;
- goto out;
- }
-
if (skb_copy_bits(skb, skb->len - alen - 2, nexthdr, 2))
BUG();
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index af8209f912ab..f361d3d56be2 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1384,7 +1384,7 @@ static void nl_fib_input(struct sk_buff *skb)
return;
nlh = nlmsg_hdr(skb);
- frn = (struct fib_result_nl *) nlmsg_data(nlh);
+ frn = nlmsg_data(nlh);
nl_fib_lookup(net, frn);
portid = NETLINK_CB(skb).portid; /* netlink portid */
@@ -1425,7 +1425,7 @@ static void fib_disable_ip(struct net_device *dev, unsigned long event,
static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
{
- struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
+ struct in_ifaddr *ifa = ptr;
struct net_device *dev = ifa->ifa_dev->dev;
struct net *net = dev_net(dev);
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 001fea394bde..513f475c6a53 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -145,7 +145,7 @@ INDIRECT_CALLABLE_SCOPE bool fib4_rule_suppress(struct fib_rule *rule,
int flags,
struct fib_lookup_arg *arg)
{
- struct fib_result *result = (struct fib_result *) arg->result;
+ struct fib_result *result = arg->result;
struct net_device *dev = NULL;
if (result->fi) {
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index ccb62038f6a4..a57ba23571c9 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -524,7 +524,7 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
fri.tb_id = tb_id;
fri.dst = key;
fri.dst_len = dst_len;
- fri.tos = inet_dscp_to_dsfield(fa->fa_dscp);
+ fri.dscp = fa->fa_dscp;
fri.type = fa->fa_type;
fri.offload = READ_ONCE(fa->offload);
fri.trap = READ_ONCE(fa->trap);
@@ -1781,7 +1781,7 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
rtm->rtm_family = AF_INET;
rtm->rtm_dst_len = fri->dst_len;
rtm->rtm_src_len = 0;
- rtm->rtm_tos = fri->tos;
+ rtm->rtm_tos = inet_dscp_to_dsfield(fri->dscp);
if (tb_id < 256)
rtm->rtm_table = tb_id;
else
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index fb0e49c36c2e..2734c3af7e24 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -82,7 +82,7 @@ static int call_fib_entry_notifier(struct notifier_block *nb,
.dst = dst,
.dst_len = dst_len,
.fi = fa->fa_info,
- .tos = inet_dscp_to_dsfield(fa->fa_dscp),
+ .dscp = fa->fa_dscp,
.type = fa->fa_type,
.tb_id = fa->tb_id,
};
@@ -99,7 +99,7 @@ static int call_fib_entry_notifiers(struct net *net,
.dst = dst,
.dst_len = dst_len,
.fi = fa->fa_info,
- .tos = inet_dscp_to_dsfield(fa->fa_dscp),
+ .dscp = fa->fa_dscp,
.type = fa->fa_type,
.tb_id = fa->tb_id,
};
@@ -1032,8 +1032,8 @@ fib_find_matching_alias(struct net *net, const struct fib_rt_info *fri)
hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
if (fa->fa_slen == slen && fa->tb_id == fri->tb_id &&
- fa->fa_dscp == inet_dsfield_to_dscp(fri->tos) &&
- fa->fa_info == fri->fi && fa->fa_type == fri->type)
+ fa->fa_dscp == fri->dscp && fa->fa_info == fri->fi &&
+ fa->fa_type == fri->type)
return fa;
}
@@ -2305,7 +2305,7 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb,
fri.tb_id = tb->tb_id;
fri.dst = xkey;
fri.dst_len = KEYLENGTH - fa->fa_slen;
- fri.tos = inet_dscp_to_dsfield(fa->fa_dscp);
+ fri.dscp = fa->fa_dscp;
fri.type = fa->fa_type;
fri.offload = READ_ONCE(fa->offload);
fri.trap = READ_ONCE(fa->trap);
@@ -2625,7 +2625,7 @@ static void fib_table_print(struct seq_file *seq, struct fib_table *tb)
static int fib_triestat_seq_show(struct seq_file *seq, void *v)
{
- struct net *net = (struct net *)seq->private;
+ struct net *net = seq->private;
unsigned int h;
seq_printf(seq,
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 0d085cc8d96c..025a33c1b04d 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -16,7 +16,6 @@
#include <net/protocol.h>
#include <net/udp.h>
#include <net/udp_tunnel.h>
-#include <net/xfrm.h>
#include <uapi/linux/fou.h>
#include <uapi/linux/genetlink.h>
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 72a375c7f417..efea0e796f06 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -186,7 +186,7 @@ EXPORT_SYMBOL(icmp_err_convert);
*/
struct icmp_control {
- bool (*handler)(struct sk_buff *skb);
+ enum skb_drop_reason (*handler)(struct sk_buff *skb);
short error; /* This ICMP is classed as an error message */
};
@@ -342,7 +342,7 @@ void icmp_out_count(struct net *net, unsigned char type)
static int icmp_glue_bits(void *from, char *to, int offset, int len, int odd,
struct sk_buff *skb)
{
- struct icmp_bxm *icmp_param = (struct icmp_bxm *)from;
+ struct icmp_bxm *icmp_param = from;
__wsum csum;
csum = skb_copy_and_csum_bits(icmp_param->skb,
@@ -839,8 +839,9 @@ static bool icmp_tag_validation(int proto)
* ICMP_PARAMETERPROB.
*/
-static bool icmp_unreach(struct sk_buff *skb)
+static enum skb_drop_reason icmp_unreach(struct sk_buff *skb)
{
+ enum skb_drop_reason reason = SKB_NOT_DROPPED_YET;
const struct iphdr *iph;
struct icmphdr *icmph;
struct net *net;
@@ -860,8 +861,10 @@ static bool icmp_unreach(struct sk_buff *skb)
icmph = icmp_hdr(skb);
iph = (const struct iphdr *)skb->data;
- if (iph->ihl < 5) /* Mangled header, drop. */
+ if (iph->ihl < 5) { /* Mangled header, drop. */
+ reason = SKB_DROP_REASON_IP_INHDR;
goto out_err;
+ }
switch (icmph->type) {
case ICMP_DEST_UNREACH:
@@ -941,10 +944,10 @@ static bool icmp_unreach(struct sk_buff *skb)
icmp_socket_deliver(skb, info);
out:
- return true;
+ return reason;
out_err:
__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
- return false;
+ return reason ?: SKB_DROP_REASON_NOT_SPECIFIED;
}
@@ -952,20 +955,20 @@ out_err:
* Handle ICMP_REDIRECT.
*/
-static bool icmp_redirect(struct sk_buff *skb)
+static enum skb_drop_reason icmp_redirect(struct sk_buff *skb)
{
if (skb->len < sizeof(struct iphdr)) {
__ICMP_INC_STATS(dev_net(skb->dev), ICMP_MIB_INERRORS);
- return false;
+ return SKB_DROP_REASON_PKT_TOO_SMALL;
}
if (!pskb_may_pull(skb, sizeof(struct iphdr))) {
/* there aught to be a stat */
- return false;
+ return SKB_DROP_REASON_NOMEM;
}
icmp_socket_deliver(skb, ntohl(icmp_hdr(skb)->un.gateway));
- return true;
+ return SKB_NOT_DROPPED_YET;
}
/*
@@ -982,7 +985,7 @@ static bool icmp_redirect(struct sk_buff *skb)
* See also WRT handling of options once they are done and working.
*/
-static bool icmp_echo(struct sk_buff *skb)
+static enum skb_drop_reason icmp_echo(struct sk_buff *skb)
{
struct icmp_bxm icmp_param;
struct net *net;
@@ -990,7 +993,7 @@ static bool icmp_echo(struct sk_buff *skb)
net = dev_net(skb_dst(skb)->dev);
/* should there be an ICMP stat for ignored echos? */
if (net->ipv4.sysctl_icmp_echo_ignore_all)
- return true;
+ return SKB_NOT_DROPPED_YET;
icmp_param.data.icmph = *icmp_hdr(skb);
icmp_param.skb = skb;
@@ -1001,10 +1004,10 @@ static bool icmp_echo(struct sk_buff *skb)
if (icmp_param.data.icmph.type == ICMP_ECHO)
icmp_param.data.icmph.type = ICMP_ECHOREPLY;
else if (!icmp_build_probe(skb, &icmp_param.data.icmph))
- return true;
+ return SKB_NOT_DROPPED_YET;
icmp_reply(&icmp_param, skb);
- return true;
+ return SKB_NOT_DROPPED_YET;
}
/* Helper for icmp_echo and icmpv6_echo_reply.
@@ -1122,7 +1125,7 @@ EXPORT_SYMBOL_GPL(icmp_build_probe);
* MUST be accurate to a few minutes.
* MUST be updated at least at 15Hz.
*/
-static bool icmp_timestamp(struct sk_buff *skb)
+static enum skb_drop_reason icmp_timestamp(struct sk_buff *skb)
{
struct icmp_bxm icmp_param;
/*
@@ -1147,17 +1150,17 @@ static bool icmp_timestamp(struct sk_buff *skb)
icmp_param.data_len = 0;
icmp_param.head_len = sizeof(struct icmphdr) + 12;
icmp_reply(&icmp_param, skb);
- return true;
+ return SKB_NOT_DROPPED_YET;
out_err:
__ICMP_INC_STATS(dev_net(skb_dst(skb)->dev), ICMP_MIB_INERRORS);
- return false;
+ return SKB_DROP_REASON_PKT_TOO_SMALL;
}
-static bool icmp_discard(struct sk_buff *skb)
+static enum skb_drop_reason icmp_discard(struct sk_buff *skb)
{
/* pretend it was a success */
- return true;
+ return SKB_NOT_DROPPED_YET;
}
/*
@@ -1165,18 +1168,20 @@ static bool icmp_discard(struct sk_buff *skb)
*/
int icmp_rcv(struct sk_buff *skb)
{
- struct icmphdr *icmph;
+ enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
struct rtable *rt = skb_rtable(skb);
struct net *net = dev_net(rt->dst.dev);
- bool success;
+ struct icmphdr *icmph;
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
struct sec_path *sp = skb_sec_path(skb);
int nh;
if (!(sp && sp->xvec[sp->len - 1]->props.flags &
- XFRM_STATE_ICMP))
+ XFRM_STATE_ICMP)) {
+ reason = SKB_DROP_REASON_XFRM_POLICY;
goto drop;
+ }
if (!pskb_may_pull(skb, sizeof(*icmph) + sizeof(struct iphdr)))
goto drop;
@@ -1184,8 +1189,11 @@ int icmp_rcv(struct sk_buff *skb)
nh = skb_network_offset(skb);
skb_set_network_header(skb, sizeof(*icmph));
- if (!xfrm4_policy_check_reverse(NULL, XFRM_POLICY_IN, skb))
+ if (!xfrm4_policy_check_reverse(NULL, XFRM_POLICY_IN,
+ skb)) {
+ reason = SKB_DROP_REASON_XFRM_POLICY;
goto drop;
+ }
skb_set_network_header(skb, nh);
}
@@ -1207,13 +1215,13 @@ int icmp_rcv(struct sk_buff *skb)
/* We can't use icmp_pointers[].handler() because it is an array of
* size NR_ICMP_TYPES + 1 (19 elements) and PROBE has code 42.
*/
- success = icmp_echo(skb);
- goto success_check;
+ reason = icmp_echo(skb);
+ goto reason_check;
}
if (icmph->type == ICMP_EXT_ECHOREPLY) {
- success = ping_rcv(skb);
- goto success_check;
+ reason = ping_rcv(skb);
+ goto reason_check;
}
/*
@@ -1222,8 +1230,10 @@ int icmp_rcv(struct sk_buff *skb)
* RFC 1122: 3.2.2 Unknown ICMP messages types MUST be silently
* discarded.
*/
- if (icmph->type > NR_ICMP_TYPES)
+ if (icmph->type > NR_ICMP_TYPES) {
+ reason = SKB_DROP_REASON_UNHANDLED_PROTO;
goto error;
+ }
/*
* Parse the ICMP message
@@ -1239,27 +1249,30 @@ int icmp_rcv(struct sk_buff *skb)
if ((icmph->type == ICMP_ECHO ||
icmph->type == ICMP_TIMESTAMP) &&
net->ipv4.sysctl_icmp_echo_ignore_broadcasts) {
+ reason = SKB_DROP_REASON_INVALID_PROTO;
goto error;
}
if (icmph->type != ICMP_ECHO &&
icmph->type != ICMP_TIMESTAMP &&
icmph->type != ICMP_ADDRESS &&
icmph->type != ICMP_ADDRESSREPLY) {
+ reason = SKB_DROP_REASON_INVALID_PROTO;
goto error;
}
}
- success = icmp_pointers[icmph->type].handler(skb);
-success_check:
- if (success) {
+ reason = icmp_pointers[icmph->type].handler(skb);
+reason_check:
+ if (!reason) {
consume_skb(skb);
return NET_RX_SUCCESS;
}
drop:
- kfree_skb(skb);
+ kfree_skb_reason(skb, reason);
return NET_RX_DROP;
csum_error:
+ reason = SKB_DROP_REASON_ICMP_CSUM;
__ICMP_INC_STATS(net, ICMP_MIB_CSUMERRORS);
error:
__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 1d9e6d5e9a76..b65d074d9620 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -2839,7 +2839,7 @@ static int igmp_mc_seq_show(struct seq_file *seq, void *v)
seq_puts(seq,
"Idx\tDevice : Count Querier\tGroup Users Timer\tReporter\n");
else {
- struct ip_mc_list *im = (struct ip_mc_list *)v;
+ struct ip_mc_list *im = v;
struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq);
char *querier;
long delta;
@@ -2983,7 +2983,7 @@ static void igmp_mcf_seq_stop(struct seq_file *seq, void *v)
static int igmp_mcf_seq_show(struct seq_file *seq, void *v)
{
- struct ip_sf_list *psf = (struct ip_sf_list *)v;
+ struct ip_sf_list *psf = v;
struct igmp_mcf_iter_state *state = igmp_mcf_seq_private(seq);
if (v == SEQ_START_TOKEN) {
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 1e5b53c2bb26..53f5f956d948 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -155,10 +155,14 @@ static int inet_csk_bind_conflict(const struct sock *sk,
*/
sk_for_each_bound(sk2, &tb->owners) {
- if (sk != sk2 &&
- (!sk->sk_bound_dev_if ||
- !sk2->sk_bound_dev_if ||
- sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
+ int bound_dev_if2;
+
+ if (sk == sk2)
+ continue;
+ bound_dev_if2 = READ_ONCE(sk2->sk_bound_dev_if);
+ if ((!sk->sk_bound_dev_if ||
+ !bound_dev_if2 ||
+ sk->sk_bound_dev_if == bound_dev_if2)) {
if (reuse && sk2->sk_reuse &&
sk2->sk_state != TCP_LISTEN) {
if ((!relax ||
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 581b5b2d72a5..b812eb36f0e3 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -1028,12 +1028,13 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb,
if (!(idiag_states & TCPF_LISTEN) || r->id.idiag_dport)
goto skip_listen_ht;
- for (i = s_i; i < INET_LHTABLE_SIZE; i++) {
+ for (i = s_i; i <= hashinfo->lhash2_mask; i++) {
struct inet_listen_hashbucket *ilb;
struct hlist_nulls_node *node;
num = 0;
- ilb = &hashinfo->listening_hash[i];
+ ilb = &hashinfo->lhash2[i];
+
spin_lock(&ilb->lock);
sk_nulls_for_each(sk, node, &ilb->nulls_head) {
struct inet_sock *inet = inet_sk(sk);
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 63948f6aeca0..c9f9ac5013a7 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -510,7 +510,7 @@ EXPORT_SYMBOL(inet_frag_reasm_prepare);
void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head,
void *reasm_data, bool try_coalesce)
{
- struct sk_buff **nextp = (struct sk_buff **)reasm_data;
+ struct sk_buff **nextp = reasm_data;
struct rb_node *rbn;
struct sk_buff *fp;
int sum_truesize;
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index a5d57fa679ca..87354e20009a 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -193,42 +193,6 @@ inet_lhash2_bucket_sk(struct inet_hashinfo *h, struct sock *sk)
return inet_lhash2_bucket(h, hash);
}
-static void inet_hash2(struct inet_hashinfo *h, struct sock *sk)
-{
- struct inet_listen_hashbucket *ilb2;
-
- if (!h->lhash2)
- return;
-
- ilb2 = inet_lhash2_bucket_sk(h, sk);
-
- spin_lock(&ilb2->lock);
- if (sk->sk_reuseport && sk->sk_family == AF_INET6)
- hlist_add_tail_rcu(&inet_csk(sk)->icsk_listen_portaddr_node,
- &ilb2->head);
- else
- hlist_add_head_rcu(&inet_csk(sk)->icsk_listen_portaddr_node,
- &ilb2->head);
- ilb2->count++;
- spin_unlock(&ilb2->lock);
-}
-
-static void inet_unhash2(struct inet_hashinfo *h, struct sock *sk)
-{
- struct inet_listen_hashbucket *ilb2;
-
- if (!h->lhash2 ||
- WARN_ON_ONCE(hlist_unhashed(&inet_csk(sk)->icsk_listen_portaddr_node)))
- return;
-
- ilb2 = inet_lhash2_bucket_sk(h, sk);
-
- spin_lock(&ilb2->lock);
- hlist_del_init_rcu(&inet_csk(sk)->icsk_listen_portaddr_node);
- ilb2->count--;
- spin_unlock(&ilb2->lock);
-}
-
static inline int compute_score(struct sock *sk, struct net *net,
const unsigned short hnum, const __be32 daddr,
const int dif, const int sdif)
@@ -282,12 +246,11 @@ static struct sock *inet_lhash2_lookup(struct net *net,
const __be32 daddr, const unsigned short hnum,
const int dif, const int sdif)
{
- struct inet_connection_sock *icsk;
struct sock *sk, *result = NULL;
+ struct hlist_nulls_node *node;
int score, hiscore = 0;
- inet_lhash2_for_each_icsk_rcu(icsk, &ilb2->head) {
- sk = (struct sock *)icsk;
+ sk_nulls_for_each_rcu(sk, node, &ilb2->nulls_head) {
score = compute_score(sk, net, hnum, daddr, dif, sdif);
if (score > hiscore) {
result = lookup_reuseport(net, sk, skb, doff,
@@ -410,13 +373,11 @@ begin:
sk_nulls_for_each_rcu(sk, node, &head->chain) {
if (sk->sk_hash != hash)
continue;
- if (likely(INET_MATCH(sk, net, acookie,
- saddr, daddr, ports, dif, sdif))) {
+ if (likely(inet_match(net, sk, acookie, ports, dif, sdif))) {
if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt)))
goto out;
- if (unlikely(!INET_MATCH(sk, net, acookie,
- saddr, daddr, ports,
- dif, sdif))) {
+ if (unlikely(!inet_match(net, sk, acookie,
+ ports, dif, sdif))) {
sock_gen_put(sk);
goto begin;
}
@@ -465,8 +426,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
if (sk2->sk_hash != hash)
continue;
- if (likely(INET_MATCH(sk2, net, acookie,
- saddr, daddr, ports, dif, sdif))) {
+ if (likely(inet_match(net, sk2, acookie, ports, dif, sdif))) {
if (sk2->sk_state == TCP_TIME_WAIT) {
tw = inet_twsk(sk2);
if (twsk_unique(sk, sk2, twp))
@@ -532,16 +492,14 @@ static bool inet_ehash_lookup_by_sk(struct sock *sk,
if (esk->sk_hash != sk->sk_hash)
continue;
if (sk->sk_family == AF_INET) {
- if (unlikely(INET_MATCH(esk, net, acookie,
- sk->sk_daddr,
- sk->sk_rcv_saddr,
+ if (unlikely(inet_match(net, esk, acookie,
ports, dif, sdif))) {
return true;
}
}
#if IS_ENABLED(CONFIG_IPV6)
else if (sk->sk_family == AF_INET6) {
- if (unlikely(INET6_MATCH(esk, net,
+ if (unlikely(inet6_match(net, esk,
&sk->sk_v6_daddr,
&sk->sk_v6_rcv_saddr,
ports, dif, sdif))) {
@@ -633,7 +591,7 @@ static int inet_reuseport_add_sock(struct sock *sk,
int __inet_hash(struct sock *sk, struct sock *osk)
{
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
- struct inet_listen_hashbucket *ilb;
+ struct inet_listen_hashbucket *ilb2;
int err = 0;
if (sk->sk_state != TCP_LISTEN) {
@@ -643,25 +601,23 @@ int __inet_hash(struct sock *sk, struct sock *osk)
return 0;
}
WARN_ON(!sk_unhashed(sk));
- ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
+ ilb2 = inet_lhash2_bucket_sk(hashinfo, sk);
- spin_lock(&ilb->lock);
+ spin_lock(&ilb2->lock);
if (sk->sk_reuseport) {
- err = inet_reuseport_add_sock(sk, ilb);
+ err = inet_reuseport_add_sock(sk, ilb2);
if (err)
goto unlock;
}
if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport &&
sk->sk_family == AF_INET6)
- __sk_nulls_add_node_tail_rcu(sk, &ilb->nulls_head);
+ __sk_nulls_add_node_tail_rcu(sk, &ilb2->nulls_head);
else
- __sk_nulls_add_node_rcu(sk, &ilb->nulls_head);
- inet_hash2(hashinfo, sk);
- ilb->count++;
+ __sk_nulls_add_node_rcu(sk, &ilb2->nulls_head);
sock_set_flag(sk, SOCK_RCU_FREE);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
unlock:
- spin_unlock(&ilb->lock);
+ spin_unlock(&ilb2->lock);
return err;
}
@@ -678,23 +634,6 @@ int inet_hash(struct sock *sk)
}
EXPORT_SYMBOL_GPL(inet_hash);
-static void __inet_unhash(struct sock *sk, struct inet_listen_hashbucket *ilb)
-{
- if (sk_unhashed(sk))
- return;
-
- if (rcu_access_pointer(sk->sk_reuseport_cb))
- reuseport_stop_listen_sock(sk);
- if (ilb) {
- struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
-
- inet_unhash2(hashinfo, sk);
- ilb->count--;
- }
- __sk_nulls_del_node_init_rcu(sk);
- sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
-}
-
void inet_unhash(struct sock *sk)
{
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
@@ -703,20 +642,34 @@ void inet_unhash(struct sock *sk)
return;
if (sk->sk_state == TCP_LISTEN) {
- struct inet_listen_hashbucket *ilb;
+ struct inet_listen_hashbucket *ilb2;
- ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
+ ilb2 = inet_lhash2_bucket_sk(hashinfo, sk);
/* Don't disable bottom halves while acquiring the lock to
* avoid circular locking dependency on PREEMPT_RT.
*/
- spin_lock(&ilb->lock);
- __inet_unhash(sk, ilb);
- spin_unlock(&ilb->lock);
+ spin_lock(&ilb2->lock);
+ if (sk_unhashed(sk)) {
+ spin_unlock(&ilb2->lock);
+ return;
+ }
+
+ if (rcu_access_pointer(sk->sk_reuseport_cb))
+ reuseport_stop_listen_sock(sk);
+
+ __sk_nulls_del_node_init_rcu(sk);
+ sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
+ spin_unlock(&ilb2->lock);
} else {
spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
spin_lock_bh(lock);
- __inet_unhash(sk, NULL);
+ if (sk_unhashed(sk)) {
+ spin_unlock_bh(lock);
+ return;
+ }
+ __sk_nulls_del_node_init_rcu(sk);
+ sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
spin_unlock_bh(lock);
}
}
@@ -874,29 +827,14 @@ int inet_hash_connect(struct inet_timewait_death_row *death_row,
}
EXPORT_SYMBOL_GPL(inet_hash_connect);
-void inet_hashinfo_init(struct inet_hashinfo *h)
-{
- int i;
-
- for (i = 0; i < INET_LHTABLE_SIZE; i++) {
- spin_lock_init(&h->listening_hash[i].lock);
- INIT_HLIST_NULLS_HEAD(&h->listening_hash[i].nulls_head,
- i + LISTENING_NULLS_BASE);
- h->listening_hash[i].count = 0;
- }
-
- h->lhash2 = NULL;
-}
-EXPORT_SYMBOL_GPL(inet_hashinfo_init);
-
static void init_hashinfo_lhash2(struct inet_hashinfo *h)
{
int i;
for (i = 0; i <= h->lhash2_mask; i++) {
spin_lock_init(&h->lhash2[i].lock);
- INIT_HLIST_HEAD(&h->lhash2[i].head);
- h->lhash2[i].count = 0;
+ INIT_HLIST_NULLS_HEAD(&h->lhash2[i].nulls_head,
+ i + LISTENING_NULLS_BASE);
}
}
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 92ba3350274b..e3aa436a1bdf 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -90,6 +90,7 @@ int ip_forward(struct sk_buff *skb)
struct rtable *rt; /* Route we use */
struct ip_options *opt = &(IPCB(skb)->opt);
struct net *net;
+ SKB_DR(reason);
/* that should never happen */
if (skb->pkt_type != PACKET_HOST)
@@ -101,8 +102,10 @@ int ip_forward(struct sk_buff *skb)
if (skb_warn_if_lro(skb))
goto drop;
- if (!xfrm4_policy_check(NULL, XFRM_POLICY_FWD, skb))
+ if (!xfrm4_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
+ SKB_DR_SET(reason, XFRM_POLICY);
goto drop;
+ }
if (IPCB(skb)->opt.router_alert && ip_call_ra_chain(skb))
return NET_RX_SUCCESS;
@@ -118,8 +121,10 @@ int ip_forward(struct sk_buff *skb)
if (ip_hdr(skb)->ttl <= 1)
goto too_many_hops;
- if (!xfrm4_route_forward(skb))
+ if (!xfrm4_route_forward(skb)) {
+ SKB_DR_SET(reason, XFRM_POLICY);
goto drop;
+ }
rt = skb_rtable(skb);
@@ -132,6 +137,7 @@ int ip_forward(struct sk_buff *skb)
IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
htonl(mtu));
+ SKB_DR_SET(reason, PKT_TOO_BIG);
goto drop;
}
@@ -169,7 +175,8 @@ too_many_hops:
/* Tell the sender its packet died... */
__IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS);
icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0);
+ SKB_DR_SET(reason, IP_INHDR);
drop:
- kfree_skb(skb);
+ kfree_skb_reason(skb, reason);
return NET_RX_DROP;
}
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index aacee9dd771b..7e474a85deaf 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -748,6 +748,7 @@ free_skb:
static void ipgre_link_update(struct net_device *dev, bool set_mtu)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
+ __be16 flags;
int len;
len = tunnel->tun_hlen;
@@ -763,19 +764,15 @@ static void ipgre_link_update(struct net_device *dev, bool set_mtu)
if (set_mtu)
dev->mtu = max_t(int, dev->mtu - len, 68);
- if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
- if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
- tunnel->encap.type == TUNNEL_ENCAP_NONE) {
- dev->features |= NETIF_F_GSO_SOFTWARE;
- dev->hw_features |= NETIF_F_GSO_SOFTWARE;
- } else {
- dev->features &= ~NETIF_F_GSO_SOFTWARE;
- dev->hw_features &= ~NETIF_F_GSO_SOFTWARE;
- }
- dev->features |= NETIF_F_LLTX;
- } else {
+ flags = tunnel->parms.o_flags;
+
+ if (flags & TUNNEL_SEQ ||
+ (flags & TUNNEL_CSUM && tunnel->encap.type != TUNNEL_ENCAP_NONE)) {
+ dev->features &= ~NETIF_F_GSO_SOFTWARE;
dev->hw_features &= ~NETIF_F_GSO_SOFTWARE;
- dev->features &= ~(NETIF_F_LLTX | NETIF_F_GSO_SOFTWARE);
+ } else {
+ dev->features |= NETIF_F_GSO_SOFTWARE;
+ dev->hw_features |= NETIF_F_GSO_SOFTWARE;
}
}
@@ -949,6 +946,7 @@ static void ipgre_tunnel_setup(struct net_device *dev)
static void __gre_tunnel_init(struct net_device *dev)
{
struct ip_tunnel *tunnel;
+ __be16 flags;
tunnel = netdev_priv(dev);
tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
@@ -957,25 +955,21 @@ static void __gre_tunnel_init(struct net_device *dev)
tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
dev->needed_headroom = tunnel->hlen + sizeof(tunnel->parms.iph);
- dev->features |= GRE_FEATURES;
+ dev->features |= GRE_FEATURES | NETIF_F_LLTX;
dev->hw_features |= GRE_FEATURES;
- if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
- /* TCP offload with GRE SEQ is not supported, nor
- * can we support 2 levels of outer headers requiring
- * an update.
- */
- if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
- (tunnel->encap.type == TUNNEL_ENCAP_NONE)) {
- dev->features |= NETIF_F_GSO_SOFTWARE;
- dev->hw_features |= NETIF_F_GSO_SOFTWARE;
- }
+ flags = tunnel->parms.o_flags;
- /* Can use a lockless transmit, unless we generate
- * output sequences
- */
- dev->features |= NETIF_F_LLTX;
- }
+ /* TCP offload with GRE SEQ is not supported, nor can we support 2
+ * levels of outer headers requiring an update.
+ */
+ if (flags & TUNNEL_SEQ)
+ return;
+ if (flags & TUNNEL_CSUM && tunnel->encap.type != TUNNEL_ENCAP_NONE)
+ return;
+
+ dev->features |= NETIF_F_GSO_SOFTWARE;
+ dev->hw_features |= NETIF_F_GSO_SOFTWARE;
}
static int ipgre_tunnel_init(struct net_device *dev)
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 95f7bb052784..b1165f717cd1 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -451,6 +451,7 @@ static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net)
* that it receives, do not try to analyse it.
*/
if (skb->pkt_type == PACKET_OTHERHOST) {
+ dev_core_stats_rx_otherhost_dropped_inc(skb->dev);
drop_reason = SKB_DROP_REASON_OTHERHOST;
goto drop;
}
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index c860519d57ee..13e6329784fb 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -356,7 +356,7 @@ static inline int ipmr_hash_cmp(struct rhashtable_compare_arg *arg,
const void *ptr)
{
const struct mfc_cache_cmp_arg *cmparg = arg->key;
- struct mfc_cache *c = (struct mfc_cache *)ptr;
+ const struct mfc_cache *c = ptr;
return cmparg->mfc_mcastgrp != c->mfc_mcastgrp ||
cmparg->mfc_origin != c->mfc_origin;
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index 4eed5afca392..918c61fda0f3 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -80,6 +80,7 @@ struct sk_buff *nf_reject_skb_v4_unreach(struct net *net,
struct iphdr *niph;
struct icmphdr *icmph;
unsigned int len;
+ int dataoff;
__wsum csum;
u8 proto;
@@ -99,10 +100,11 @@ struct sk_buff *nf_reject_skb_v4_unreach(struct net *net,
if (pskb_trim_rcsum(oldskb, ntohs(ip_hdr(oldskb)->tot_len)))
return NULL;
+ dataoff = ip_hdrlen(oldskb);
proto = ip_hdr(oldskb)->protocol;
if (!skb_csum_unnecessary(oldskb) &&
- nf_reject_verify_csum(proto) &&
+ nf_reject_verify_csum(oldskb, dataoff, proto) &&
nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), proto))
return NULL;
@@ -311,6 +313,7 @@ EXPORT_SYMBOL_GPL(nf_send_reset);
void nf_send_unreach(struct sk_buff *skb_in, int code, int hook)
{
struct iphdr *iph = ip_hdr(skb_in);
+ int dataoff = ip_hdrlen(skb_in);
u8 proto = iph->protocol;
if (iph->frag_off & htons(IP_OFFSET))
@@ -320,12 +323,13 @@ void nf_send_unreach(struct sk_buff *skb_in, int code, int hook)
nf_reject_fill_skb_dst(skb_in) < 0)
return;
- if (skb_csum_unnecessary(skb_in) || !nf_reject_verify_csum(proto)) {
+ if (skb_csum_unnecessary(skb_in) ||
+ !nf_reject_verify_csum(skb_in, dataoff, proto)) {
icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0);
return;
}
- if (nf_ip_checksum(skb_in, hook, ip_hdrlen(skb_in), proto) == 0)
+ if (nf_ip_checksum(skb_in, hook, dataoff, proto) == 0)
icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0);
}
EXPORT_SYMBOL_GPL(nf_send_unreach);
diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c
index 4151eb1262dd..b75cac69bd7e 100644
--- a/net/ipv4/netfilter/nft_fib_ipv4.c
+++ b/net/ipv4/netfilter/nft_fib_ipv4.c
@@ -112,6 +112,10 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
fl4.daddr = iph->daddr;
fl4.saddr = get_saddr(iph->saddr);
} else {
+ if (nft_hook(pkt) == NF_INET_FORWARD &&
+ priv->flags & NFTA_FIB_F_IIF)
+ fl4.flowi4_iif = nft_out(pkt)->ifindex;
+
fl4.daddr = iph->saddr;
fl4.saddr = get_saddr(iph->daddr);
}
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index aa9a11b20d18..1a43ca73f94d 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -600,7 +600,7 @@ EXPORT_SYMBOL_GPL(ping_err);
int ping_getfrag(void *from, char *to,
int offset, int fraglen, int odd, struct sk_buff *skb)
{
- struct pingfakehdr *pfh = (struct pingfakehdr *)from;
+ struct pingfakehdr *pfh = from;
if (offset == 0) {
fraglen -= sizeof(struct icmphdr);
@@ -854,8 +854,8 @@ do_confirm:
goto out;
}
-int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
- int flags, int *addr_len)
+int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
+ int *addr_len)
{
struct inet_sock *isk = inet_sk(sk);
int family = sk->sk_family;
@@ -871,7 +871,7 @@ int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
if (flags & MSG_ERRQUEUE)
return inet_recv_error(sk, msg, len, addr_len);
- skb = skb_recv_datagram(sk, flags, noblock, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
if (!skb)
goto out;
@@ -944,16 +944,24 @@ out:
}
EXPORT_SYMBOL_GPL(ping_recvmsg);
-int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+static enum skb_drop_reason __ping_queue_rcv_skb(struct sock *sk,
+ struct sk_buff *skb)
{
+ enum skb_drop_reason reason;
+
pr_debug("ping_queue_rcv_skb(sk=%p,sk->num=%d,skb=%p)\n",
inet_sk(sk), inet_sk(sk)->inet_num, skb);
- if (sock_queue_rcv_skb(sk, skb) < 0) {
- kfree_skb(skb);
+ if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) {
+ kfree_skb_reason(skb, reason);
pr_debug("ping_queue_rcv_skb -> failed\n");
- return -1;
+ return reason;
}
- return 0;
+ return SKB_NOT_DROPPED_YET;
+}
+
+int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+{
+ return __ping_queue_rcv_skb(sk, skb) ? -1 : 0;
}
EXPORT_SYMBOL_GPL(ping_queue_rcv_skb);
@@ -962,12 +970,12 @@ EXPORT_SYMBOL_GPL(ping_queue_rcv_skb);
* All we need to do is get the socket.
*/
-bool ping_rcv(struct sk_buff *skb)
+enum skb_drop_reason ping_rcv(struct sk_buff *skb)
{
+ enum skb_drop_reason reason = SKB_DROP_REASON_NO_SOCKET;
struct sock *sk;
struct net *net = dev_net(skb->dev);
struct icmphdr *icmph = icmp_hdr(skb);
- bool rc = false;
/* We assume the packet has already been checked by icmp_rcv */
@@ -982,15 +990,17 @@ bool ping_rcv(struct sk_buff *skb)
struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
pr_debug("rcv on socket %p\n", sk);
- if (skb2 && !ping_queue_rcv_skb(sk, skb2))
- rc = true;
+ if (skb2)
+ reason = __ping_queue_rcv_skb(sk, skb2);
+ else
+ reason = SKB_DROP_REASON_NOMEM;
sock_put(sk);
}
- if (!rc)
+ if (reason)
pr_debug("no socket, dropping\n");
- return rc;
+ return reason;
}
EXPORT_SYMBOL_GPL(ping_rcv);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 9f97b9cbf7b3..bbd717805b10 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -753,7 +753,7 @@ out:
*/
static int raw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
- int noblock, int flags, int *addr_len)
+ int flags, int *addr_len)
{
struct inet_sock *inet = inet_sk(sk);
size_t copied = 0;
@@ -769,7 +769,7 @@ static int raw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
goto out;
}
- skb = skb_recv_datagram(sk, flags, noblock, &err);
+ skb = skb_recv_datagram(sk, flags, &err);
if (!skb)
goto out;
@@ -783,7 +783,7 @@ static int raw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
if (err)
goto done;
- sock_recv_ts_and_drops(msg, sk, skb);
+ sock_recv_cmsgs(msg, sk, skb);
/* Copy the address. */
if (sin) {
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index ed01063d8f30..356f535f3443 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -503,28 +503,29 @@ static void ip_rt_fix_tos(struct flowi4 *fl4)
__u8 tos = RT_FL_TOS(fl4);
fl4->flowi4_tos = tos & IPTOS_RT_MASK;
- fl4->flowi4_scope = tos & RTO_ONLINK ?
- RT_SCOPE_LINK : RT_SCOPE_UNIVERSE;
+ if (tos & RTO_ONLINK)
+ fl4->flowi4_scope = RT_SCOPE_LINK;
}
static void __build_flow_key(const struct net *net, struct flowi4 *fl4,
- const struct sock *sk,
- const struct iphdr *iph,
- int oif, u8 tos,
- u8 prot, u32 mark, int flow_flags)
+ const struct sock *sk, const struct iphdr *iph,
+ int oif, __u8 tos, u8 prot, u32 mark,
+ int flow_flags)
{
+ __u8 scope = RT_SCOPE_UNIVERSE;
+
if (sk) {
const struct inet_sock *inet = inet_sk(sk);
oif = sk->sk_bound_dev_if;
mark = sk->sk_mark;
- tos = RT_CONN_FLAGS(sk);
+ tos = ip_sock_rt_tos(sk);
+ scope = ip_sock_rt_scope(sk);
prot = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol;
}
- flowi4_init_output(fl4, oif, mark, tos,
- RT_SCOPE_UNIVERSE, prot,
- flow_flags,
- iph->daddr, iph->saddr, 0, 0,
+
+ flowi4_init_output(fl4, oif, mark, tos & IPTOS_RT_MASK, scope,
+ prot, flow_flags, iph->daddr, iph->saddr, 0, 0,
sock_net_uid(net, sk));
}
@@ -534,9 +535,9 @@ static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb,
const struct net *net = dev_net(skb->dev);
const struct iphdr *iph = ip_hdr(skb);
int oif = skb->dev->ifindex;
- u8 tos = RT_TOS(iph->tos);
u8 prot = iph->protocol;
u32 mark = skb->mark;
+ __u8 tos = iph->tos;
__build_flow_key(net, fl4, sk, iph, oif, tos, prot, mark, 0);
}
@@ -552,7 +553,8 @@ static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
if (inet_opt && inet_opt->opt.srr)
daddr = inet_opt->opt.faddr;
flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
- RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
+ ip_sock_rt_tos(sk) & IPTOS_RT_MASK,
+ ip_sock_rt_scope(sk),
inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
inet_sk_flowi_flags(sk),
daddr, inet->inet_saddr, 0, 0, sk->sk_uid);
@@ -825,14 +827,13 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf
const struct iphdr *iph = (const struct iphdr *) skb->data;
struct net *net = dev_net(skb->dev);
int oif = skb->dev->ifindex;
- u8 tos = RT_TOS(iph->tos);
u8 prot = iph->protocol;
u32 mark = skb->mark;
+ __u8 tos = iph->tos;
rt = (struct rtable *) dst;
__build_flow_key(net, &fl4, sk, iph, oif, tos, prot, mark, 0);
- ip_rt_fix_tos(&fl4);
__ip_do_redirect(rt, skb, &fl4, true);
}
@@ -945,6 +946,7 @@ static int ip_error(struct sk_buff *skb)
struct inet_peer *peer;
unsigned long now;
struct net *net;
+ SKB_DR(reason);
bool send;
int code;
@@ -964,10 +966,12 @@ static int ip_error(struct sk_buff *skb)
if (!IN_DEV_FORWARD(in_dev)) {
switch (rt->dst.error) {
case EHOSTUNREACH:
+ SKB_DR_SET(reason, IP_INADDRERRORS);
__IP_INC_STATS(net, IPSTATS_MIB_INADDRERRORS);
break;
case ENETUNREACH:
+ SKB_DR_SET(reason, IP_INNOROUTES);
__IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES);
break;
}
@@ -983,6 +987,7 @@ static int ip_error(struct sk_buff *skb)
break;
case ENETUNREACH:
code = ICMP_NET_UNREACH;
+ SKB_DR_SET(reason, IP_INNOROUTES);
__IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES);
break;
case EACCES:
@@ -1009,7 +1014,7 @@ static int ip_error(struct sk_buff *skb)
if (send)
icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
-out: kfree_skb(skb);
+out: kfree_skb_reason(skb, reason);
return 0;
}
@@ -1057,7 +1062,6 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
struct flowi4 fl4;
ip_rt_build_flow_key(&fl4, sk, skb);
- ip_rt_fix_tos(&fl4);
/* Don't make lookup fail for bridged encapsulations */
if (skb && netif_is_any_bridge_port(skb->dev))
@@ -1074,8 +1078,8 @@ void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
struct rtable *rt;
u32 mark = IP4_REPLY_MARK(net, skb->mark);
- __build_flow_key(net, &fl4, NULL, iph, oif,
- RT_TOS(iph->tos), protocol, mark, 0);
+ __build_flow_key(net, &fl4, NULL, iph, oif, iph->tos, protocol, mark,
+ 0);
rt = __ip_route_output_key(net, &fl4);
if (!IS_ERR(rt)) {
__ip_rt_update_pmtu(rt, &fl4, mtu);
@@ -1132,8 +1136,6 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
goto out;
new = true;
- } else {
- ip_rt_fix_tos(&fl4);
}
__ip_rt_update_pmtu((struct rtable *)xfrm_dst_path(&rt->dst), &fl4, mtu);
@@ -1165,8 +1167,7 @@ void ipv4_redirect(struct sk_buff *skb, struct net *net,
struct flowi4 fl4;
struct rtable *rt;
- __build_flow_key(net, &fl4, NULL, iph, oif,
- RT_TOS(iph->tos), protocol, 0, 0);
+ __build_flow_key(net, &fl4, NULL, iph, oif, iph->tos, protocol, 0, 0);
rt = __ip_route_output_key(net, &fl4);
if (!IS_ERR(rt)) {
__ip_do_redirect(rt, skb, &fl4, false);
@@ -3408,7 +3409,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
fri.tb_id = table_id;
fri.dst = res.prefix;
fri.dst_len = res.prefixlen;
- fri.tos = fl4.flowi4_tos;
+ fri.dscp = inet_dsfield_to_dscp(fl4.flowi4_tos);
fri.type = rt->rt_type;
fri.offload = 0;
fri.trap = 0;
@@ -3421,7 +3422,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
if (fa->fa_slen == slen &&
fa->tb_id == fri.tb_id &&
- fa->fa_dscp == inet_dsfield_to_dscp(fri.tos) &&
+ fa->fa_dscp == fri.dscp &&
fa->fa_info == res.fi &&
fa->fa_type == fri.type) {
fri.offload = READ_ONCE(fa->offload);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index ad80d180b60b..cd448cdd3b38 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -20,10 +20,6 @@
#include <net/protocol.h>
#include <net/netevent.h>
-static int two = 2;
-static int three __maybe_unused = 3;
-static int four = 4;
-static int thousand = 1000;
static int tcp_retr1_max = 255;
static int ip_local_port_range_min[] = { 1, 1 };
static int ip_local_port_range_max[] = { 65535, 65535 };
@@ -1006,7 +1002,7 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644,
.proc_handler = proc_dou8vec_minmax,
.extra1 = SYSCTL_ZERO,
- .extra2 = &two,
+ .extra2 = SYSCTL_TWO,
},
{
.procname = "tcp_max_syn_backlog",
@@ -1059,7 +1055,7 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644,
.proc_handler = proc_fib_multipath_hash_policy,
.extra1 = SYSCTL_ZERO,
- .extra2 = &three,
+ .extra2 = SYSCTL_THREE,
},
{
.procname = "fib_multipath_hash_fields",
@@ -1117,7 +1113,7 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644,
.proc_handler = proc_dou8vec_minmax,
.extra1 = SYSCTL_ZERO,
- .extra2 = &four,
+ .extra2 = SYSCTL_FOUR,
},
{
.procname = "tcp_recovery",
@@ -1310,7 +1306,7 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
- .extra2 = &thousand,
+ .extra2 = SYSCTL_ONE_THOUSAND,
},
{
.procname = "tcp_pacing_ca_ratio",
@@ -1319,7 +1315,7 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
- .extra2 = &thousand,
+ .extra2 = SYSCTL_ONE_THOUSAND,
},
{
.procname = "tcp_wmem",
@@ -1391,7 +1387,7 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644,
.proc_handler = proc_dou8vec_minmax,
.extra1 = SYSCTL_ZERO,
- .extra2 = &two,
+ .extra2 = SYSCTL_TWO,
},
{ }
};
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index cf18fbcbf123..028513d3e2a2 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -429,7 +429,7 @@ void tcp_init_sock(struct sock *sk)
* algorithms that we must have the following bandaid to talk
* efficiently to them. -DaveM
*/
- tp->snd_cwnd = TCP_INIT_CWND;
+ tcp_snd_cwnd_set(tp, TCP_INIT_CWND);
/* There's a bubble in the pipe until at least the first ACK. */
tp->app_limited = ~0U;
@@ -843,7 +843,6 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
}
release_sock(sk);
- sk_defer_free_flush(sk);
if (spliced)
return spliced;
@@ -1589,20 +1588,6 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied)
tcp_send_ack(sk);
}
-void __sk_defer_free_flush(struct sock *sk)
-{
- struct llist_node *head;
- struct sk_buff *skb, *n;
-
- head = llist_del_all(&sk->defer_list);
- llist_for_each_entry_safe(skb, n, head, ll_node) {
- prefetch(n);
- skb_mark_not_on_list(skb);
- __kfree_skb(skb);
- }
-}
-EXPORT_SYMBOL(__sk_defer_free_flush);
-
static void tcp_eat_recv_skb(struct sock *sk, struct sk_buff *skb)
{
__skb_unlink(skb, &sk->sk_receive_queue);
@@ -1610,11 +1595,7 @@ static void tcp_eat_recv_skb(struct sock *sk, struct sk_buff *skb)
sock_rfree(skb);
skb->destructor = NULL;
skb->sk = NULL;
- if (!skb_queue_empty(&sk->sk_receive_queue) ||
- !llist_empty(&sk->defer_list)) {
- llist_add(&skb->ll_node, &sk->defer_list);
- return;
- }
+ return skb_attempt_defer_free(skb);
}
__kfree_skb(skb);
}
@@ -1877,8 +1858,7 @@ static void tcp_zerocopy_set_hint_for_skb(struct sock *sk,
}
static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len,
- int nonblock, int flags,
- struct scm_timestamping_internal *tss,
+ int flags, struct scm_timestamping_internal *tss,
int *cmsg_flags);
static int receive_fallback_to_copy(struct sock *sk,
struct tcp_zerocopy_receive *zc, int inq,
@@ -1900,7 +1880,7 @@ static int receive_fallback_to_copy(struct sock *sk,
if (err)
return err;
- err = tcp_recvmsg_locked(sk, &msg, inq, /*nonblock=*/1, /*flags=*/0,
+ err = tcp_recvmsg_locked(sk, &msg, inq, MSG_DONTWAIT,
tss, &zc->msg_flags);
if (err < 0)
return err;
@@ -2316,8 +2296,7 @@ static int tcp_inq_hint(struct sock *sk)
*/
static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len,
- int nonblock, int flags,
- struct scm_timestamping_internal *tss,
+ int flags, struct scm_timestamping_internal *tss,
int *cmsg_flags)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -2335,9 +2314,11 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len,
if (sk->sk_state == TCP_LISTEN)
goto out;
- if (tp->recvmsg_inq)
+ if (tp->recvmsg_inq) {
*cmsg_flags = TCP_CMSG_INQ;
- timeo = sock_rcvtimeo(sk, nonblock);
+ msg->msg_get_inq = 1;
+ }
+ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
/* Urgent data needs to be handled specially. */
if (flags & MSG_OOB)
@@ -2455,7 +2436,6 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len,
__sk_flush_backlog(sk);
} else {
tcp_cleanup_rbuf(sk, copied);
- sk_defer_free_flush(sk);
sk_wait_data(sk, &timeo, last);
}
@@ -2556,10 +2536,10 @@ recv_sndq:
goto out;
}
-int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
- int flags, int *addr_len)
+int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
+ int *addr_len)
{
- int cmsg_flags = 0, ret, inq;
+ int cmsg_flags = 0, ret;
struct scm_timestamping_internal tss;
if (unlikely(flags & MSG_ERRQUEUE))
@@ -2568,20 +2548,20 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
if (sk_can_busy_loop(sk) &&
skb_queue_empty_lockless(&sk->sk_receive_queue) &&
sk->sk_state == TCP_ESTABLISHED)
- sk_busy_loop(sk, nonblock);
+ sk_busy_loop(sk, flags & MSG_DONTWAIT);
lock_sock(sk);
- ret = tcp_recvmsg_locked(sk, msg, len, nonblock, flags, &tss,
- &cmsg_flags);
+ ret = tcp_recvmsg_locked(sk, msg, len, flags, &tss, &cmsg_flags);
release_sock(sk);
- sk_defer_free_flush(sk);
- if (cmsg_flags && ret >= 0) {
+ if ((cmsg_flags || msg->msg_get_inq) && ret >= 0) {
if (cmsg_flags & TCP_CMSG_TS)
tcp_recv_timestamp(msg, sk, &tss);
- if (cmsg_flags & TCP_CMSG_INQ) {
- inq = tcp_inq_hint(sk);
- put_cmsg(msg, SOL_TCP, TCP_CM_INQ, sizeof(inq), &inq);
+ if (msg->msg_get_inq) {
+ msg->msg_inq = tcp_inq_hint(sk);
+ if (cmsg_flags & TCP_CMSG_INQ)
+ put_cmsg(msg, SOL_TCP, TCP_CM_INQ,
+ sizeof(msg->msg_inq), &msg->msg_inq);
}
}
return ret;
@@ -3033,7 +3013,7 @@ int tcp_disconnect(struct sock *sk, int flags)
icsk->icsk_rto_min = TCP_RTO_MIN;
icsk->icsk_delack_max = TCP_DELACK_MAX;
tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
- tp->snd_cwnd = TCP_INIT_CWND;
+ tcp_snd_cwnd_set(tp, TCP_INIT_CWND);
tp->snd_cwnd_cnt = 0;
tp->window_clamp = 0;
tp->delivered = 0;
@@ -3099,7 +3079,6 @@ int tcp_disconnect(struct sock *sk, int flags)
sk->sk_frag.page = NULL;
sk->sk_frag.offset = 0;
}
- sk_defer_free_flush(sk);
sk_error_report(sk);
return 0;
}
@@ -3744,7 +3723,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
info->tcpi_max_pacing_rate = rate64;
info->tcpi_reordering = tp->reordering;
- info->tcpi_snd_cwnd = tp->snd_cwnd;
+ info->tcpi_snd_cwnd = tcp_snd_cwnd(tp);
if (info->tcpi_state == TCP_LISTEN) {
/* listeners aliased fields :
@@ -3915,7 +3894,7 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk,
rate64 = tcp_compute_delivery_rate(tp);
nla_put_u64_64bit(stats, TCP_NLA_DELIVERY_RATE, rate64, TCP_NLA_PAD);
- nla_put_u32(stats, TCP_NLA_SND_CWND, tp->snd_cwnd);
+ nla_put_u32(stats, TCP_NLA_SND_CWND, tcp_snd_cwnd(tp));
nla_put_u32(stats, TCP_NLA_REORDERING, tp->reordering);
nla_put_u32(stats, TCP_NLA_MIN_RTT, tcp_min_rtt(tp));
@@ -4228,7 +4207,6 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
err = BPF_CGROUP_RUN_PROG_GETSOCKOPT_KERN(sk, level, optname,
&zc, &len, err);
release_sock(sk);
- sk_defer_free_flush(sk);
if (len >= offsetofend(struct tcp_zerocopy_receive, msg_flags))
goto zerocopy_rcv_cmsg;
switch (len) {
@@ -4617,7 +4595,6 @@ void __init tcp_init(void)
timer_setup(&tcp_orphan_timer, tcp_orphan_update, TIMER_DEFERRABLE);
mod_timer(&tcp_orphan_timer, jiffies + TCP_ORPHAN_TIMER_PERIOD);
- inet_hashinfo_init(&tcp_hashinfo);
inet_hashinfo2_init(&tcp_hashinfo, "tcp_listen_portaddr_hash",
thash_entries, 21, /* one slot per 2 MB*/
0, 64 * 1024);
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index 02e8626ccb27..075e744bfb48 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -276,7 +276,7 @@ static void bbr_init_pacing_rate_from_rtt(struct sock *sk)
} else { /* no RTT sample yet */
rtt_us = USEC_PER_MSEC; /* use nominal default RTT */
}
- bw = (u64)tp->snd_cwnd * BW_UNIT;
+ bw = (u64)tcp_snd_cwnd(tp) * BW_UNIT;
do_div(bw, rtt_us);
sk->sk_pacing_rate = bbr_bw_to_pacing_rate(sk, bw, bbr_high_gain);
}
@@ -310,7 +310,7 @@ static u32 bbr_tso_segs_goal(struct sock *sk)
*/
bytes = min_t(unsigned long,
sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift),
- GSO_MAX_SIZE - 1 - MAX_TCP_HEADER);
+ GSO_LEGACY_MAX_SIZE - 1 - MAX_TCP_HEADER);
segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk));
return min(segs, 0x7FU);
@@ -323,9 +323,9 @@ static void bbr_save_cwnd(struct sock *sk)
struct bbr *bbr = inet_csk_ca(sk);
if (bbr->prev_ca_state < TCP_CA_Recovery && bbr->mode != BBR_PROBE_RTT)
- bbr->prior_cwnd = tp->snd_cwnd; /* this cwnd is good enough */
+ bbr->prior_cwnd = tcp_snd_cwnd(tp); /* this cwnd is good enough */
else /* loss recovery or BBR_PROBE_RTT have temporarily cut cwnd */
- bbr->prior_cwnd = max(bbr->prior_cwnd, tp->snd_cwnd);
+ bbr->prior_cwnd = max(bbr->prior_cwnd, tcp_snd_cwnd(tp));
}
static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event)
@@ -482,7 +482,7 @@ static bool bbr_set_cwnd_to_recover_or_restore(
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
u8 prev_state = bbr->prev_ca_state, state = inet_csk(sk)->icsk_ca_state;
- u32 cwnd = tp->snd_cwnd;
+ u32 cwnd = tcp_snd_cwnd(tp);
/* An ACK for P pkts should release at most 2*P packets. We do this
* in two steps. First, here we deduct the number of lost packets.
@@ -520,7 +520,7 @@ static void bbr_set_cwnd(struct sock *sk, const struct rate_sample *rs,
{
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
- u32 cwnd = tp->snd_cwnd, target_cwnd = 0;
+ u32 cwnd = tcp_snd_cwnd(tp), target_cwnd = 0;
if (!acked)
goto done; /* no packet fully ACKed; just apply caps */
@@ -544,9 +544,9 @@ static void bbr_set_cwnd(struct sock *sk, const struct rate_sample *rs,
cwnd = max(cwnd, bbr_cwnd_min_target);
done:
- tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp); /* apply global cap */
+ tcp_snd_cwnd_set(tp, min(cwnd, tp->snd_cwnd_clamp)); /* apply global cap */
if (bbr->mode == BBR_PROBE_RTT) /* drain queue, refresh min_rtt */
- tp->snd_cwnd = min(tp->snd_cwnd, bbr_cwnd_min_target);
+ tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp), bbr_cwnd_min_target));
}
/* End cycle phase if it's time and/or we hit the phase's in-flight target. */
@@ -856,7 +856,7 @@ static void bbr_update_ack_aggregation(struct sock *sk,
bbr->ack_epoch_acked = min_t(u32, 0xFFFFF,
bbr->ack_epoch_acked + rs->acked_sacked);
extra_acked = bbr->ack_epoch_acked - expected_acked;
- extra_acked = min(extra_acked, tp->snd_cwnd);
+ extra_acked = min(extra_acked, tcp_snd_cwnd(tp));
if (extra_acked > bbr->extra_acked[bbr->extra_acked_win_idx])
bbr->extra_acked[bbr->extra_acked_win_idx] = extra_acked;
}
@@ -914,7 +914,7 @@ static void bbr_check_probe_rtt_done(struct sock *sk)
return;
bbr->min_rtt_stamp = tcp_jiffies32; /* wait a while until PROBE_RTT */
- tp->snd_cwnd = max(tp->snd_cwnd, bbr->prior_cwnd);
+ tcp_snd_cwnd_set(tp, max(tcp_snd_cwnd(tp), bbr->prior_cwnd));
bbr_reset_mode(sk);
}
@@ -1093,7 +1093,7 @@ static u32 bbr_undo_cwnd(struct sock *sk)
bbr->full_bw = 0; /* spurious slow-down; reset full pipe detection */
bbr->full_bw_cnt = 0;
bbr_reset_lt_bw_sampling(sk);
- return tcp_sk(sk)->snd_cwnd;
+ return tcp_snd_cwnd(tcp_sk(sk));
}
/* Entering loss recovery, so save cwnd for when we exit or undo recovery. */
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index f5f588b1f6e9..58358bf92e1b 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -150,7 +150,7 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
if (!acked)
return;
}
- bictcp_update(ca, tp->snd_cwnd);
+ bictcp_update(ca, tcp_snd_cwnd(tp));
tcp_cong_avoid_ai(tp, ca->cnt, acked);
}
@@ -166,16 +166,16 @@ static u32 bictcp_recalc_ssthresh(struct sock *sk)
ca->epoch_start = 0; /* end of epoch */
/* Wmax and fast convergence */
- if (tp->snd_cwnd < ca->last_max_cwnd && fast_convergence)
- ca->last_max_cwnd = (tp->snd_cwnd * (BICTCP_BETA_SCALE + beta))
+ if (tcp_snd_cwnd(tp) < ca->last_max_cwnd && fast_convergence)
+ ca->last_max_cwnd = (tcp_snd_cwnd(tp) * (BICTCP_BETA_SCALE + beta))
/ (2 * BICTCP_BETA_SCALE);
else
- ca->last_max_cwnd = tp->snd_cwnd;
+ ca->last_max_cwnd = tcp_snd_cwnd(tp);
- if (tp->snd_cwnd <= low_window)
- return max(tp->snd_cwnd >> 1U, 2U);
+ if (tcp_snd_cwnd(tp) <= low_window)
+ return max(tcp_snd_cwnd(tp) >> 1U, 2U);
else
- return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U);
+ return max((tcp_snd_cwnd(tp) * beta) / BICTCP_BETA_SCALE, 2U);
}
static void bictcp_state(struct sock *sk, u8 new_state)
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index 1cdcb4df0eb7..be3947e70fec 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -174,7 +174,6 @@ static int tcp_msg_wait_data(struct sock *sk, struct sk_psock *psock,
static int tcp_bpf_recvmsg_parser(struct sock *sk,
struct msghdr *msg,
size_t len,
- int nonblock,
int flags,
int *addr_len)
{
@@ -186,7 +185,7 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk,
psock = sk_psock_get(sk);
if (unlikely(!psock))
- return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
+ return tcp_recvmsg(sk, msg, len, flags, addr_len);
lock_sock(sk);
msg_bytes_ready:
@@ -211,7 +210,7 @@ msg_bytes_ready:
goto out;
}
- timeo = sock_rcvtimeo(sk, nonblock);
+ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
if (!timeo) {
copied = -EAGAIN;
goto out;
@@ -234,7 +233,7 @@ out:
}
static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
- int nonblock, int flags, int *addr_len)
+ int flags, int *addr_len)
{
struct sk_psock *psock;
int copied, ret;
@@ -244,11 +243,11 @@ static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
psock = sk_psock_get(sk);
if (unlikely(!psock))
- return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
+ return tcp_recvmsg(sk, msg, len, flags, addr_len);
if (!skb_queue_empty(&sk->sk_receive_queue) &&
sk_psock_queue_empty(psock)) {
sk_psock_put(sk, psock);
- return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
+ return tcp_recvmsg(sk, msg, len, flags, addr_len);
}
lock_sock(sk);
msg_bytes_ready:
@@ -257,14 +256,14 @@ msg_bytes_ready:
long timeo;
int data;
- timeo = sock_rcvtimeo(sk, nonblock);
+ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
data = tcp_msg_wait_data(sk, psock, timeo);
if (data) {
if (!sk_psock_queue_empty(psock))
goto msg_bytes_ready;
release_sock(sk);
sk_psock_put(sk, psock);
- return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
+ return tcp_recvmsg(sk, msg, len, flags, addr_len);
}
copied = -EAGAIN;
}
diff --git a/net/ipv4/tcp_cdg.c b/net/ipv4/tcp_cdg.c
index 709d23801823..ddc7ba0554bd 100644
--- a/net/ipv4/tcp_cdg.c
+++ b/net/ipv4/tcp_cdg.c
@@ -161,8 +161,8 @@ static void tcp_cdg_hystart_update(struct sock *sk)
LINUX_MIB_TCPHYSTARTTRAINDETECT);
NET_ADD_STATS(sock_net(sk),
LINUX_MIB_TCPHYSTARTTRAINCWND,
- tp->snd_cwnd);
- tp->snd_ssthresh = tp->snd_cwnd;
+ tcp_snd_cwnd(tp));
+ tp->snd_ssthresh = tcp_snd_cwnd(tp);
return;
}
}
@@ -180,8 +180,8 @@ static void tcp_cdg_hystart_update(struct sock *sk)
LINUX_MIB_TCPHYSTARTDELAYDETECT);
NET_ADD_STATS(sock_net(sk),
LINUX_MIB_TCPHYSTARTDELAYCWND,
- tp->snd_cwnd);
- tp->snd_ssthresh = tp->snd_cwnd;
+ tcp_snd_cwnd(tp));
+ tp->snd_ssthresh = tcp_snd_cwnd(tp);
}
}
}
@@ -252,7 +252,7 @@ static bool tcp_cdg_backoff(struct sock *sk, u32 grad)
return false;
}
- ca->shadow_wnd = max(ca->shadow_wnd, tp->snd_cwnd);
+ ca->shadow_wnd = max(ca->shadow_wnd, tcp_snd_cwnd(tp));
ca->state = CDG_BACKOFF;
tcp_enter_cwr(sk);
return true;
@@ -285,14 +285,14 @@ static void tcp_cdg_cong_avoid(struct sock *sk, u32 ack, u32 acked)
}
if (!tcp_is_cwnd_limited(sk)) {
- ca->shadow_wnd = min(ca->shadow_wnd, tp->snd_cwnd);
+ ca->shadow_wnd = min(ca->shadow_wnd, tcp_snd_cwnd(tp));
return;
}
- prior_snd_cwnd = tp->snd_cwnd;
+ prior_snd_cwnd = tcp_snd_cwnd(tp);
tcp_reno_cong_avoid(sk, ack, acked);
- incr = tp->snd_cwnd - prior_snd_cwnd;
+ incr = tcp_snd_cwnd(tp) - prior_snd_cwnd;
ca->shadow_wnd = max(ca->shadow_wnd, ca->shadow_wnd + incr);
}
@@ -331,15 +331,15 @@ static u32 tcp_cdg_ssthresh(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
if (ca->state == CDG_BACKOFF)
- return max(2U, (tp->snd_cwnd * min(1024U, backoff_beta)) >> 10);
+ return max(2U, (tcp_snd_cwnd(tp) * min(1024U, backoff_beta)) >> 10);
if (ca->state == CDG_NONFULL && use_tolerance)
- return tp->snd_cwnd;
+ return tcp_snd_cwnd(tp);
- ca->shadow_wnd = min(ca->shadow_wnd >> 1, tp->snd_cwnd);
+ ca->shadow_wnd = min(ca->shadow_wnd >> 1, tcp_snd_cwnd(tp));
if (use_shadow)
- return max3(2U, ca->shadow_wnd, tp->snd_cwnd >> 1);
- return max(2U, tp->snd_cwnd >> 1);
+ return max3(2U, ca->shadow_wnd, tcp_snd_cwnd(tp) >> 1);
+ return max(2U, tcp_snd_cwnd(tp) >> 1);
}
static void tcp_cdg_cwnd_event(struct sock *sk, const enum tcp_ca_event ev)
@@ -357,7 +357,7 @@ static void tcp_cdg_cwnd_event(struct sock *sk, const enum tcp_ca_event ev)
ca->gradients = gradients;
ca->rtt_seq = tp->snd_nxt;
- ca->shadow_wnd = tp->snd_cwnd;
+ ca->shadow_wnd = tcp_snd_cwnd(tp);
break;
case CA_EVENT_COMPLETE_CWR:
ca->state = CDG_UNKNOWN;
@@ -380,7 +380,7 @@ static void tcp_cdg_init(struct sock *sk)
ca->gradients = kcalloc(window, sizeof(ca->gradients[0]),
GFP_NOWAIT | __GFP_NOWARN);
ca->rtt_seq = tp->snd_nxt;
- ca->shadow_wnd = tp->snd_cwnd;
+ ca->shadow_wnd = tcp_snd_cwnd(tp);
}
static void tcp_cdg_release(struct sock *sk)
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index dc95572163df..d3cae40749e8 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -16,6 +16,7 @@
#include <linux/gfp.h>
#include <linux/jhash.h>
#include <net/tcp.h>
+#include <trace/events/tcp.h>
static DEFINE_SPINLOCK(tcp_cong_list_lock);
static LIST_HEAD(tcp_cong_list);
@@ -33,6 +34,17 @@ struct tcp_congestion_ops *tcp_ca_find(const char *name)
return NULL;
}
+void tcp_set_ca_state(struct sock *sk, const u8 ca_state)
+{
+ struct inet_connection_sock *icsk = inet_csk(sk);
+
+ trace_tcp_cong_state_set(sk, ca_state);
+
+ if (icsk->icsk_ca_ops->set_state)
+ icsk->icsk_ca_ops->set_state(sk, ca_state);
+ icsk->icsk_ca_state = ca_state;
+}
+
/* Must be called with rcu lock held */
static struct tcp_congestion_ops *tcp_ca_find_autoload(struct net *net,
const char *name)
@@ -393,10 +405,10 @@ int tcp_set_congestion_control(struct sock *sk, const char *name, bool load,
*/
u32 tcp_slow_start(struct tcp_sock *tp, u32 acked)
{
- u32 cwnd = min(tp->snd_cwnd + acked, tp->snd_ssthresh);
+ u32 cwnd = min(tcp_snd_cwnd(tp) + acked, tp->snd_ssthresh);
- acked -= cwnd - tp->snd_cwnd;
- tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp);
+ acked -= cwnd - tcp_snd_cwnd(tp);
+ tcp_snd_cwnd_set(tp, min(cwnd, tp->snd_cwnd_clamp));
return acked;
}
@@ -410,7 +422,7 @@ void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked)
/* If credits accumulated at a higher w, apply them gently now. */
if (tp->snd_cwnd_cnt >= w) {
tp->snd_cwnd_cnt = 0;
- tp->snd_cwnd++;
+ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1);
}
tp->snd_cwnd_cnt += acked;
@@ -418,9 +430,9 @@ void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked)
u32 delta = tp->snd_cwnd_cnt / w;
tp->snd_cwnd_cnt -= delta * w;
- tp->snd_cwnd += delta;
+ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + delta);
}
- tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_cwnd_clamp);
+ tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp), tp->snd_cwnd_clamp));
}
EXPORT_SYMBOL_GPL(tcp_cong_avoid_ai);
@@ -445,7 +457,7 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
return;
}
/* In dangerous area, increase slowly. */
- tcp_cong_avoid_ai(tp, tp->snd_cwnd, acked);
+ tcp_cong_avoid_ai(tp, tcp_snd_cwnd(tp), acked);
}
EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid);
@@ -454,7 +466,7 @@ u32 tcp_reno_ssthresh(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
- return max(tp->snd_cwnd >> 1U, 2U);
+ return max(tcp_snd_cwnd(tp) >> 1U, 2U);
}
EXPORT_SYMBOL_GPL(tcp_reno_ssthresh);
@@ -462,7 +474,7 @@ u32 tcp_reno_undo_cwnd(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
- return max(tp->snd_cwnd, tp->prior_cwnd);
+ return max(tcp_snd_cwnd(tp), tp->prior_cwnd);
}
EXPORT_SYMBOL_GPL(tcp_reno_undo_cwnd);
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 24d562dd6225..68178e7280ce 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -334,7 +334,7 @@ static void cubictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
if (!acked)
return;
}
- bictcp_update(ca, tp->snd_cwnd, acked);
+ bictcp_update(ca, tcp_snd_cwnd(tp), acked);
tcp_cong_avoid_ai(tp, ca->cnt, acked);
}
@@ -346,13 +346,13 @@ static u32 cubictcp_recalc_ssthresh(struct sock *sk)
ca->epoch_start = 0; /* end of epoch */
/* Wmax and fast convergence */
- if (tp->snd_cwnd < ca->last_max_cwnd && fast_convergence)
- ca->last_max_cwnd = (tp->snd_cwnd * (BICTCP_BETA_SCALE + beta))
+ if (tcp_snd_cwnd(tp) < ca->last_max_cwnd && fast_convergence)
+ ca->last_max_cwnd = (tcp_snd_cwnd(tp) * (BICTCP_BETA_SCALE + beta))
/ (2 * BICTCP_BETA_SCALE);
else
- ca->last_max_cwnd = tp->snd_cwnd;
+ ca->last_max_cwnd = tcp_snd_cwnd(tp);
- return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U);
+ return max((tcp_snd_cwnd(tp) * beta) / BICTCP_BETA_SCALE, 2U);
}
static void cubictcp_state(struct sock *sk, u8 new_state)
@@ -372,7 +372,7 @@ static void cubictcp_state(struct sock *sk, u8 new_state)
* We apply another 100% factor because @rate is doubled at this point.
* We cap the cushion to 1ms.
*/
-static u32 hystart_ack_delay(struct sock *sk)
+static u32 hystart_ack_delay(const struct sock *sk)
{
unsigned long rate;
@@ -380,7 +380,7 @@ static u32 hystart_ack_delay(struct sock *sk)
if (!rate)
return 0;
return min_t(u64, USEC_PER_MSEC,
- div64_ul((u64)GSO_MAX_SIZE * 4 * USEC_PER_SEC, rate));
+ div64_ul((u64)sk->sk_gso_max_size * 4 * USEC_PER_SEC, rate));
}
static void hystart_update(struct sock *sk, u32 delay)
@@ -413,13 +413,13 @@ static void hystart_update(struct sock *sk, u32 delay)
ca->found = 1;
pr_debug("hystart_ack_train (%u > %u) delay_min %u (+ ack_delay %u) cwnd %u\n",
now - ca->round_start, threshold,
- ca->delay_min, hystart_ack_delay(sk), tp->snd_cwnd);
+ ca->delay_min, hystart_ack_delay(sk), tcp_snd_cwnd(tp));
NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPHYSTARTTRAINDETECT);
NET_ADD_STATS(sock_net(sk),
LINUX_MIB_TCPHYSTARTTRAINCWND,
- tp->snd_cwnd);
- tp->snd_ssthresh = tp->snd_cwnd;
+ tcp_snd_cwnd(tp));
+ tp->snd_ssthresh = tcp_snd_cwnd(tp);
}
}
}
@@ -438,8 +438,8 @@ static void hystart_update(struct sock *sk, u32 delay)
LINUX_MIB_TCPHYSTARTDELAYDETECT);
NET_ADD_STATS(sock_net(sk),
LINUX_MIB_TCPHYSTARTDELAYCWND,
- tp->snd_cwnd);
- tp->snd_ssthresh = tp->snd_cwnd;
+ tcp_snd_cwnd(tp));
+ tp->snd_ssthresh = tcp_snd_cwnd(tp);
}
}
}
@@ -469,7 +469,7 @@ static void cubictcp_acked(struct sock *sk, const struct ack_sample *sample)
/* hystart triggers when cwnd is larger than some threshold */
if (!ca->found && tcp_in_slow_start(tp) && hystart &&
- tp->snd_cwnd >= hystart_low_window)
+ tcp_snd_cwnd(tp) >= hystart_low_window)
hystart_update(sk, delay);
}
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index 1943a6630341..ab034a4e9324 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -106,8 +106,8 @@ static u32 dctcp_ssthresh(struct sock *sk)
struct dctcp *ca = inet_csk_ca(sk);
struct tcp_sock *tp = tcp_sk(sk);
- ca->loss_cwnd = tp->snd_cwnd;
- return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->dctcp_alpha) >> 11U), 2U);
+ ca->loss_cwnd = tcp_snd_cwnd(tp);
+ return max(tcp_snd_cwnd(tp) - ((tcp_snd_cwnd(tp) * ca->dctcp_alpha) >> 11U), 2U);
}
static void dctcp_update_alpha(struct sock *sk, u32 flags)
@@ -148,8 +148,8 @@ static void dctcp_react_to_loss(struct sock *sk)
struct dctcp *ca = inet_csk_ca(sk);
struct tcp_sock *tp = tcp_sk(sk);
- ca->loss_cwnd = tp->snd_cwnd;
- tp->snd_ssthresh = max(tp->snd_cwnd >> 1U, 2U);
+ ca->loss_cwnd = tcp_snd_cwnd(tp);
+ tp->snd_ssthresh = max(tcp_snd_cwnd(tp) >> 1U, 2U);
}
static void dctcp_state(struct sock *sk, u8 new_state)
@@ -211,8 +211,9 @@ static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr,
static u32 dctcp_cwnd_undo(struct sock *sk)
{
const struct dctcp *ca = inet_csk_ca(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
- return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
+ return max(tcp_snd_cwnd(tp), ca->loss_cwnd);
}
static struct tcp_congestion_ops dctcp __read_mostly = {
diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c
index 349069d6cd0a..c6de5ce79ad3 100644
--- a/net/ipv4/tcp_highspeed.c
+++ b/net/ipv4/tcp_highspeed.c
@@ -127,22 +127,22 @@ static void hstcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
* snd_cwnd <=
* hstcp_aimd_vals[ca->ai].cwnd
*/
- if (tp->snd_cwnd > hstcp_aimd_vals[ca->ai].cwnd) {
- while (tp->snd_cwnd > hstcp_aimd_vals[ca->ai].cwnd &&
+ if (tcp_snd_cwnd(tp) > hstcp_aimd_vals[ca->ai].cwnd) {
+ while (tcp_snd_cwnd(tp) > hstcp_aimd_vals[ca->ai].cwnd &&
ca->ai < HSTCP_AIMD_MAX - 1)
ca->ai++;
- } else if (ca->ai && tp->snd_cwnd <= hstcp_aimd_vals[ca->ai-1].cwnd) {
- while (ca->ai && tp->snd_cwnd <= hstcp_aimd_vals[ca->ai-1].cwnd)
+ } else if (ca->ai && tcp_snd_cwnd(tp) <= hstcp_aimd_vals[ca->ai-1].cwnd) {
+ while (ca->ai && tcp_snd_cwnd(tp) <= hstcp_aimd_vals[ca->ai-1].cwnd)
ca->ai--;
}
/* Do additive increase */
- if (tp->snd_cwnd < tp->snd_cwnd_clamp) {
+ if (tcp_snd_cwnd(tp) < tp->snd_cwnd_clamp) {
/* cwnd = cwnd + a(w) / cwnd */
tp->snd_cwnd_cnt += ca->ai + 1;
- if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
- tp->snd_cwnd_cnt -= tp->snd_cwnd;
- tp->snd_cwnd++;
+ if (tp->snd_cwnd_cnt >= tcp_snd_cwnd(tp)) {
+ tp->snd_cwnd_cnt -= tcp_snd_cwnd(tp);
+ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1);
}
}
}
@@ -154,7 +154,7 @@ static u32 hstcp_ssthresh(struct sock *sk)
struct hstcp *ca = inet_csk_ca(sk);
/* Do multiplicative decrease */
- return max(tp->snd_cwnd - ((tp->snd_cwnd * hstcp_aimd_vals[ca->ai].md) >> 8), 2U);
+ return max(tcp_snd_cwnd(tp) - ((tcp_snd_cwnd(tp) * hstcp_aimd_vals[ca->ai].md) >> 8), 2U);
}
static struct tcp_congestion_ops tcp_highspeed __read_mostly = {
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index 55adcfcf96fe..52b1f2665dfa 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -124,7 +124,7 @@ static void measure_achieved_throughput(struct sock *sk,
ca->packetcount += sample->pkts_acked;
- if (ca->packetcount >= tp->snd_cwnd - (ca->alpha >> 7 ? : 1) &&
+ if (ca->packetcount >= tcp_snd_cwnd(tp) - (ca->alpha >> 7 ? : 1) &&
now - ca->lasttime >= ca->minRTT &&
ca->minRTT > 0) {
__u32 cur_Bi = ca->packetcount * HZ / (now - ca->lasttime);
@@ -225,7 +225,7 @@ static u32 htcp_recalc_ssthresh(struct sock *sk)
const struct htcp *ca = inet_csk_ca(sk);
htcp_param_update(sk);
- return max((tp->snd_cwnd * ca->beta) >> 7, 2U);
+ return max((tcp_snd_cwnd(tp) * ca->beta) >> 7, 2U);
}
static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
@@ -242,9 +242,9 @@ static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
/* In dangerous area, increase slowly.
* In theory this is tp->snd_cwnd += alpha / tp->snd_cwnd
*/
- if ((tp->snd_cwnd_cnt * ca->alpha)>>7 >= tp->snd_cwnd) {
- if (tp->snd_cwnd < tp->snd_cwnd_clamp)
- tp->snd_cwnd++;
+ if ((tp->snd_cwnd_cnt * ca->alpha)>>7 >= tcp_snd_cwnd(tp)) {
+ if (tcp_snd_cwnd(tp) < tp->snd_cwnd_clamp)
+ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1);
tp->snd_cwnd_cnt = 0;
htcp_alpha_update(ca);
} else
diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c
index be39327e04e6..abd7d91807e5 100644
--- a/net/ipv4/tcp_hybla.c
+++ b/net/ipv4/tcp_hybla.c
@@ -54,7 +54,7 @@ static void hybla_init(struct sock *sk)
ca->rho2_7ls = 0;
ca->snd_cwnd_cents = 0;
ca->hybla_en = true;
- tp->snd_cwnd = 2;
+ tcp_snd_cwnd_set(tp, 2);
tp->snd_cwnd_clamp = 65535;
/* 1st Rho measurement based on initial srtt */
@@ -62,7 +62,7 @@ static void hybla_init(struct sock *sk)
/* set minimum rtt as this is the 1st ever seen */
ca->minrtt_us = tp->srtt_us;
- tp->snd_cwnd = ca->rho;
+ tcp_snd_cwnd_set(tp, ca->rho);
}
static void hybla_state(struct sock *sk, u8 ca_state)
@@ -137,31 +137,31 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked)
* as long as increment is estimated as (rho<<7)/window
* it already is <<7 and we can easily count its fractions.
*/
- increment = ca->rho2_7ls / tp->snd_cwnd;
+ increment = ca->rho2_7ls / tcp_snd_cwnd(tp);
if (increment < 128)
tp->snd_cwnd_cnt++;
}
odd = increment % 128;
- tp->snd_cwnd += increment >> 7;
+ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + (increment >> 7));
ca->snd_cwnd_cents += odd;
/* check when fractions goes >=128 and increase cwnd by 1. */
while (ca->snd_cwnd_cents >= 128) {
- tp->snd_cwnd++;
+ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1);
ca->snd_cwnd_cents -= 128;
tp->snd_cwnd_cnt = 0;
}
/* check when cwnd has not been incremented for a while */
- if (increment == 0 && odd == 0 && tp->snd_cwnd_cnt >= tp->snd_cwnd) {
- tp->snd_cwnd++;
+ if (increment == 0 && odd == 0 && tp->snd_cwnd_cnt >= tcp_snd_cwnd(tp)) {
+ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1);
tp->snd_cwnd_cnt = 0;
}
/* clamp down slowstart cwnd to ssthresh value. */
if (is_slowstart)
- tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
+ tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp), tp->snd_ssthresh));
- tp->snd_cwnd = min_t(u32, tp->snd_cwnd, tp->snd_cwnd_clamp);
+ tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp), tp->snd_cwnd_clamp));
}
static struct tcp_congestion_ops tcp_hybla __read_mostly = {
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
index 00e54873213e..c0c81a2c77fa 100644
--- a/net/ipv4/tcp_illinois.c
+++ b/net/ipv4/tcp_illinois.c
@@ -224,7 +224,7 @@ static void update_params(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
struct illinois *ca = inet_csk_ca(sk);
- if (tp->snd_cwnd < win_thresh) {
+ if (tcp_snd_cwnd(tp) < win_thresh) {
ca->alpha = ALPHA_BASE;
ca->beta = BETA_BASE;
} else if (ca->cnt_rtt > 0) {
@@ -284,9 +284,9 @@ static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 acked)
* tp->snd_cwnd += alpha/tp->snd_cwnd
*/
delta = (tp->snd_cwnd_cnt * ca->alpha) >> ALPHA_SHIFT;
- if (delta >= tp->snd_cwnd) {
- tp->snd_cwnd = min(tp->snd_cwnd + delta / tp->snd_cwnd,
- (u32)tp->snd_cwnd_clamp);
+ if (delta >= tcp_snd_cwnd(tp)) {
+ tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp) + delta / tcp_snd_cwnd(tp),
+ (u32)tp->snd_cwnd_clamp));
tp->snd_cwnd_cnt = 0;
}
}
@@ -296,9 +296,11 @@ static u32 tcp_illinois_ssthresh(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct illinois *ca = inet_csk_ca(sk);
+ u32 decr;
/* Multiplicative decrease */
- return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->beta) >> BETA_SHIFT), 2U);
+ decr = (tcp_snd_cwnd(tp) * ca->beta) >> BETA_SHIFT;
+ return max(tcp_snd_cwnd(tp) - decr, 2U);
}
/* Extract info for Tcp socket info provided via netlink. */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 60f99e9fb6d1..97cfcd85f84e 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -414,7 +414,7 @@ static void tcp_sndbuf_expand(struct sock *sk)
per_mss = roundup_pow_of_two(per_mss) +
SKB_DATA_ALIGN(sizeof(struct sk_buff));
- nr_segs = max_t(u32, TCP_INIT_CWND, tp->snd_cwnd);
+ nr_segs = max_t(u32, TCP_INIT_CWND, tcp_snd_cwnd(tp));
nr_segs = max_t(u32, nr_segs, tp->reordering + 1);
/* Fast Recovery (RFC 5681 3.2) :
@@ -909,12 +909,12 @@ static void tcp_update_pacing_rate(struct sock *sk)
* If snd_cwnd >= (tp->snd_ssthresh / 2), we are approaching
* end of slow start and should slow down.
*/
- if (tp->snd_cwnd < tp->snd_ssthresh / 2)
+ if (tcp_snd_cwnd(tp) < tp->snd_ssthresh / 2)
rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ss_ratio;
else
rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ca_ratio;
- rate *= max(tp->snd_cwnd, tp->packets_out);
+ rate *= max(tcp_snd_cwnd(tp), tp->packets_out);
if (likely(tp->srtt_us))
do_div(rate, tp->srtt_us);
@@ -2147,12 +2147,12 @@ void tcp_enter_loss(struct sock *sk)
!after(tp->high_seq, tp->snd_una) ||
(icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
tp->prior_ssthresh = tcp_current_ssthresh(sk);
- tp->prior_cwnd = tp->snd_cwnd;
+ tp->prior_cwnd = tcp_snd_cwnd(tp);
tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
tcp_ca_event(sk, CA_EVENT_LOSS);
tcp_init_undo(tp);
}
- tp->snd_cwnd = tcp_packets_in_flight(tp) + 1;
+ tcp_snd_cwnd_set(tp, tcp_packets_in_flight(tp) + 1);
tp->snd_cwnd_cnt = 0;
tp->snd_cwnd_stamp = tcp_jiffies32;
@@ -2458,7 +2458,7 @@ static void DBGUNDO(struct sock *sk, const char *msg)
pr_debug("Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n",
msg,
&inet->inet_daddr, ntohs(inet->inet_dport),
- tp->snd_cwnd, tcp_left_out(tp),
+ tcp_snd_cwnd(tp), tcp_left_out(tp),
tp->snd_ssthresh, tp->prior_ssthresh,
tp->packets_out);
}
@@ -2467,7 +2467,7 @@ static void DBGUNDO(struct sock *sk, const char *msg)
pr_debug("Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n",
msg,
&sk->sk_v6_daddr, ntohs(inet->inet_dport),
- tp->snd_cwnd, tcp_left_out(tp),
+ tcp_snd_cwnd(tp), tcp_left_out(tp),
tp->snd_ssthresh, tp->prior_ssthresh,
tp->packets_out);
}
@@ -2492,7 +2492,7 @@ static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
if (tp->prior_ssthresh) {
const struct inet_connection_sock *icsk = inet_csk(sk);
- tp->snd_cwnd = icsk->icsk_ca_ops->undo_cwnd(sk);
+ tcp_snd_cwnd_set(tp, icsk->icsk_ca_ops->undo_cwnd(sk));
if (tp->prior_ssthresh > tp->snd_ssthresh) {
tp->snd_ssthresh = tp->prior_ssthresh;
@@ -2599,7 +2599,7 @@ static void tcp_init_cwnd_reduction(struct sock *sk)
tp->high_seq = tp->snd_nxt;
tp->tlp_high_seq = 0;
tp->snd_cwnd_cnt = 0;
- tp->prior_cwnd = tp->snd_cwnd;
+ tp->prior_cwnd = tcp_snd_cwnd(tp);
tp->prr_delivered = 0;
tp->prr_out = 0;
tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk);
@@ -2629,7 +2629,7 @@ void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, int newly_lost,
}
/* Force a fast retransmit upon entering fast recovery */
sndcnt = max(sndcnt, (tp->prr_out ? 0 : 1));
- tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt;
+ tcp_snd_cwnd_set(tp, tcp_packets_in_flight(tp) + sndcnt);
}
static inline void tcp_end_cwnd_reduction(struct sock *sk)
@@ -2642,7 +2642,7 @@ static inline void tcp_end_cwnd_reduction(struct sock *sk)
/* Reset cwnd to ssthresh in CWR or Recovery (unless it's undone) */
if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH &&
(inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || tp->undo_marker)) {
- tp->snd_cwnd = tp->snd_ssthresh;
+ tcp_snd_cwnd_set(tp, tp->snd_ssthresh);
tp->snd_cwnd_stamp = tcp_jiffies32;
}
tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
@@ -2709,9 +2709,9 @@ static void tcp_mtup_probe_success(struct sock *sk)
/* FIXME: breaks with very large cwnd */
tp->prior_ssthresh = tcp_current_ssthresh(sk);
- tp->snd_cwnd = tp->snd_cwnd *
- tcp_mss_to_mtu(sk, tp->mss_cache) /
- icsk->icsk_mtup.probe_size;
+ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) *
+ tcp_mss_to_mtu(sk, tp->mss_cache) /
+ icsk->icsk_mtup.probe_size);
tp->snd_cwnd_cnt = 0;
tp->snd_cwnd_stamp = tcp_jiffies32;
tp->snd_ssthresh = tcp_current_ssthresh(sk);
@@ -3034,7 +3034,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
tp->snd_una == tp->mtu_probe.probe_seq_start) {
tcp_mtup_probe_failed(sk);
/* Restores the reduction we did in tcp_mtup_probe() */
- tp->snd_cwnd++;
+ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1);
tcp_simple_retransmit(sk);
return;
}
@@ -3766,7 +3766,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
if (before(ack, prior_snd_una - tp->max_window)) {
if (!(flag & FLAG_NO_CHALLENGE_ACK))
tcp_send_challenge_ack(sk);
- return -1;
+ return -SKB_DROP_REASON_TCP_TOO_OLD_ACK;
}
goto old_ack;
}
@@ -3775,7 +3775,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
* this segment (RFC793 Section 3.9).
*/
if (after(ack, tp->snd_nxt))
- return -1;
+ return -SKB_DROP_REASON_TCP_ACK_UNSENT_DATA;
if (after(ack, prior_snd_una)) {
flag |= FLAG_SND_UNA_ADVANCED;
@@ -4675,7 +4675,7 @@ static bool tcp_ooo_try_coalesce(struct sock *sk,
{
bool res = tcp_try_coalesce(sk, to, from, fragstolen);
- /* In case tcp_drop() is called later, update to->gso_segs */
+ /* In case tcp_drop_reason() is called later, update to->gso_segs */
if (res) {
u32 gso_segs = max_t(u16, 1, skb_shinfo(to)->gso_segs) +
max_t(u16, 1, skb_shinfo(from)->gso_segs);
@@ -4692,11 +4692,6 @@ static void tcp_drop_reason(struct sock *sk, struct sk_buff *skb,
kfree_skb_reason(skb, reason);
}
-static void tcp_drop(struct sock *sk, struct sk_buff *skb)
-{
- tcp_drop_reason(sk, skb, SKB_DROP_REASON_NOT_SPECIFIED);
-}
-
/* This one checks to see if we can put data from the
* out_of_order queue into the receive_queue.
*/
@@ -4724,7 +4719,7 @@ static void tcp_ofo_queue(struct sock *sk)
rb_erase(&skb->rbnode, &tp->out_of_order_queue);
if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) {
- tcp_drop(sk, skb);
+ tcp_drop_reason(sk, skb, SKB_DROP_REASON_TCP_OFO_DROP);
continue;
}
@@ -5335,7 +5330,8 @@ static bool tcp_prune_ofo_queue(struct sock *sk)
prev = rb_prev(node);
rb_erase(node, &tp->out_of_order_queue);
goal -= rb_to_skb(node)->truesize;
- tcp_drop(sk, rb_to_skb(node));
+ tcp_drop_reason(sk, rb_to_skb(node),
+ SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE);
if (!prev || goal <= 0) {
sk_mem_reclaim(sk);
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
@@ -5437,7 +5433,7 @@ static bool tcp_should_expand_sndbuf(struct sock *sk)
return false;
/* If we filled the congestion window, do not expand. */
- if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
+ if (tcp_packets_in_flight(tp) >= tcp_snd_cwnd(tp))
return false;
return true;
@@ -5678,7 +5674,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
const struct tcphdr *th, int syn_inerr)
{
struct tcp_sock *tp = tcp_sk(sk);
- bool rst_seq_match = false;
+ SKB_DR(reason);
/* RFC1323: H1. Apply PAWS check first. */
if (tcp_fast_parse_options(sock_net(sk), skb, th, tp) &&
@@ -5690,6 +5686,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
LINUX_MIB_TCPACKSKIPPEDPAWS,
&tp->last_oow_ack_time))
tcp_send_dupack(sk, skb);
+ SKB_DR_SET(reason, TCP_RFC7323_PAWS);
goto discard;
}
/* Reset is accepted even if it did not pass PAWS. */
@@ -5711,8 +5708,9 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
&tp->last_oow_ack_time))
tcp_send_dupack(sk, skb);
} else if (tcp_reset_check(sk, skb)) {
- tcp_reset(sk, skb);
+ goto reset;
}
+ SKB_DR_SET(reason, TCP_INVALID_SEQUENCE);
goto discard;
}
@@ -5728,9 +5726,10 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
* Send a challenge ACK
*/
if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt ||
- tcp_reset_check(sk, skb)) {
- rst_seq_match = true;
- } else if (tcp_is_sack(tp) && tp->rx_opt.num_sacks > 0) {
+ tcp_reset_check(sk, skb))
+ goto reset;
+
+ if (tcp_is_sack(tp) && tp->rx_opt.num_sacks > 0) {
struct tcp_sack_block *sp = &tp->selective_acks[0];
int max_sack = sp[0].end_seq;
int this_sack;
@@ -5743,21 +5742,18 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
}
if (TCP_SKB_CB(skb)->seq == max_sack)
- rst_seq_match = true;
+ goto reset;
}
- if (rst_seq_match)
- tcp_reset(sk, skb);
- else {
- /* Disable TFO if RST is out-of-order
- * and no data has been received
- * for current active TFO socket
- */
- if (tp->syn_fastopen && !tp->data_segs_in &&
- sk->sk_state == TCP_ESTABLISHED)
- tcp_fastopen_active_disable(sk);
- tcp_send_challenge_ack(sk);
- }
+ /* Disable TFO if RST is out-of-order
+ * and no data has been received
+ * for current active TFO socket
+ */
+ if (tp->syn_fastopen && !tp->data_segs_in &&
+ sk->sk_state == TCP_ESTABLISHED)
+ tcp_fastopen_active_disable(sk);
+ tcp_send_challenge_ack(sk);
+ SKB_DR_SET(reason, TCP_RESET);
goto discard;
}
@@ -5772,6 +5768,7 @@ syn_challenge:
TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNCHALLENGE);
tcp_send_challenge_ack(sk);
+ SKB_DR_SET(reason, TCP_INVALID_SYN);
goto discard;
}
@@ -5780,7 +5777,12 @@ syn_challenge:
return true;
discard:
- tcp_drop(sk, skb);
+ tcp_drop_reason(sk, skb, reason);
+ return false;
+
+reset:
+ tcp_reset(sk, skb);
+ __kfree_skb(skb);
return false;
}
@@ -5926,6 +5928,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS);
/* Bulk data transfer: receiver */
+ skb_dst_drop(skb);
__skb_pull(skb, tcp_header_len);
eaten = tcp_queue_rcv(sk, skb, &fragstolen);
@@ -5967,9 +5970,11 @@ slow_path:
return;
step5:
- if (tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0)
+ reason = tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT);
+ if ((int)reason < 0) {
+ reason = -reason;
goto discard;
-
+ }
tcp_rcv_rtt_measure_ts(sk, skb);
/* Process urgent data. */
@@ -6009,9 +6014,9 @@ void tcp_init_transfer(struct sock *sk, int bpf_op, struct sk_buff *skb)
* retransmission has occurred.
*/
if (tp->total_retrans > 1 && tp->undo_marker)
- tp->snd_cwnd = 1;
+ tcp_snd_cwnd_set(tp, 1);
else
- tp->snd_cwnd = tcp_init_cwnd(tp, __sk_dst_get(sk));
+ tcp_snd_cwnd_set(tp, tcp_init_cwnd(tp, __sk_dst_get(sk)));
tp->snd_cwnd_stamp = tcp_jiffies32;
bpf_skops_established(sk, bpf_op, skb);
@@ -6147,6 +6152,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
struct tcp_fastopen_cookie foc = { .len = -1 };
int saved_clamp = tp->rx_opt.mss_clamp;
bool fastopen_fail;
+ SKB_DR(reason);
tcp_parse_options(sock_net(sk), skb, &tp->rx_opt, 0, &foc);
if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
@@ -6189,7 +6195,9 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
if (th->rst) {
tcp_reset(sk, skb);
- goto discard;
+consume:
+ __kfree_skb(skb);
+ return 0;
}
/* rfc793:
@@ -6199,9 +6207,10 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
* See note below!
* --ANK(990513)
*/
- if (!th->syn)
+ if (!th->syn) {
+ SKB_DR_SET(reason, TCP_FLAGS);
goto discard_and_undo;
-
+ }
/* rfc793:
* "If the SYN bit is on ...
* are acceptable then ...
@@ -6278,13 +6287,9 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
TCP_DELACK_MAX, TCP_RTO_MAX);
-
-discard:
- tcp_drop(sk, skb);
- return 0;
- } else {
- tcp_send_ack(sk);
+ goto consume;
}
+ tcp_send_ack(sk);
return -1;
}
@@ -6296,15 +6301,16 @@ discard:
*
* Otherwise (no ACK) drop the segment and return."
*/
-
+ SKB_DR_SET(reason, TCP_RESET);
goto discard_and_undo;
}
/* PAWS check. */
if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp &&
- tcp_paws_reject(&tp->rx_opt, 0))
+ tcp_paws_reject(&tp->rx_opt, 0)) {
+ SKB_DR_SET(reason, TCP_RFC7323_PAWS);
goto discard_and_undo;
-
+ }
if (th->syn) {
/* We see SYN without ACK. It is attempt of
* simultaneous connect with crossed SYNs.
@@ -6353,7 +6359,7 @@ discard:
*/
return -1;
#else
- goto discard;
+ goto consume;
#endif
}
/* "fifth, if neither of the SYN or RST bits is set then
@@ -6363,7 +6369,8 @@ discard:
discard_and_undo:
tcp_clear_options(&tp->rx_opt);
tp->rx_opt.mss_clamp = saved_clamp;
- goto discard;
+ tcp_drop_reason(sk, skb, reason);
+ return 0;
reset_and_undo:
tcp_clear_options(&tp->rx_opt);
@@ -6418,21 +6425,26 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
struct request_sock *req;
int queued = 0;
bool acceptable;
+ SKB_DR(reason);
switch (sk->sk_state) {
case TCP_CLOSE:
+ SKB_DR_SET(reason, TCP_CLOSE);
goto discard;
case TCP_LISTEN:
if (th->ack)
return 1;
- if (th->rst)
+ if (th->rst) {
+ SKB_DR_SET(reason, TCP_RESET);
goto discard;
-
+ }
if (th->syn) {
- if (th->fin)
+ if (th->fin) {
+ SKB_DR_SET(reason, TCP_FLAGS);
goto discard;
+ }
/* It is possible that we process SYN packets from backlog,
* so we need to make sure to disable BH and RCU right there.
*/
@@ -6447,6 +6459,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
consume_skb(skb);
return 0;
}
+ SKB_DR_SET(reason, TCP_FLAGS);
goto discard;
case TCP_SYN_SENT:
@@ -6473,13 +6486,16 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
sk->sk_state != TCP_FIN_WAIT1);
- if (!tcp_check_req(sk, skb, req, true, &req_stolen))
+ if (!tcp_check_req(sk, skb, req, true, &req_stolen)) {
+ SKB_DR_SET(reason, TCP_FASTOPEN);
goto discard;
+ }
}
- if (!th->ack && !th->rst && !th->syn)
+ if (!th->ack && !th->rst && !th->syn) {
+ SKB_DR_SET(reason, TCP_FLAGS);
goto discard;
-
+ }
if (!tcp_validate_incoming(sk, skb, th, 0))
return 0;
@@ -6492,6 +6508,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
if (sk->sk_state == TCP_SYN_RECV)
return 1; /* send one RST */
tcp_send_challenge_ack(sk);
+ SKB_DR_SET(reason, TCP_OLD_ACK);
goto discard;
}
switch (sk->sk_state) {
@@ -6585,7 +6602,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
inet_csk_reset_keepalive_timer(sk, tmo);
} else {
tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
- goto discard;
+ goto consume;
}
break;
}
@@ -6593,7 +6610,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
case TCP_CLOSING:
if (tp->snd_una == tp->write_seq) {
tcp_time_wait(sk, TCP_TIME_WAIT, 0);
- goto discard;
+ goto consume;
}
break;
@@ -6601,7 +6618,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
if (tp->snd_una == tp->write_seq) {
tcp_update_metrics(sk);
tcp_done(sk);
- goto discard;
+ goto consume;
}
break;
}
@@ -6652,9 +6669,13 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
if (!queued) {
discard:
- tcp_drop(sk, skb);
+ tcp_drop_reason(sk, skb, reason);
}
return 0;
+
+consume:
+ __kfree_skb(skb);
+ return 0;
}
EXPORT_SYMBOL(tcp_rcv_state_process);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 457f5b5d5d4a..dac2650f3863 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -229,9 +229,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
orig_dport = usin->sin_port;
fl4 = &inet->cork.fl.u.ip4;
rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
- RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
- IPPROTO_TCP,
- orig_sport, orig_dport, sk);
+ sk->sk_bound_dev_if, IPPROTO_TCP, orig_sport,
+ orig_dport, sk);
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
if (err == -ENETUNREACH)
@@ -2066,7 +2065,6 @@ process:
sk_incoming_cpu_update(sk);
- sk_defer_free_flush(sk);
bh_lock_sock_nested(sk);
tcp_segs_in(tcp_sk(sk), skb);
ret = 0;
@@ -2103,6 +2101,7 @@ bad_packet:
}
discard_it:
+ SKB_DR_OR(drop_reason, NOT_SPECIFIED);
/* Discard frame. */
kfree_skb_reason(skb, drop_reason);
return 0;
@@ -2285,16 +2284,15 @@ static void *listening_get_first(struct seq_file *seq)
st->offset = 0;
for (; st->bucket <= tcp_hashinfo.lhash2_mask; st->bucket++) {
struct inet_listen_hashbucket *ilb2;
- struct inet_connection_sock *icsk;
+ struct hlist_nulls_node *node;
struct sock *sk;
ilb2 = &tcp_hashinfo.lhash2[st->bucket];
- if (hlist_empty(&ilb2->head))
+ if (hlist_nulls_empty(&ilb2->nulls_head))
continue;
spin_lock(&ilb2->lock);
- inet_lhash2_for_each_icsk(icsk, &ilb2->head) {
- sk = (struct sock *)icsk;
+ sk_nulls_for_each(sk, node, &ilb2->nulls_head) {
if (seq_sk_match(seq, sk))
return sk;
}
@@ -2313,15 +2311,14 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
{
struct tcp_iter_state *st = seq->private;
struct inet_listen_hashbucket *ilb2;
- struct inet_connection_sock *icsk;
+ struct hlist_nulls_node *node;
struct sock *sk = cur;
++st->num;
++st->offset;
- icsk = inet_csk(sk);
- inet_lhash2_for_each_icsk_continue(icsk) {
- sk = (struct sock *)icsk;
+ sk = sk_nulls_next(sk);
+ sk_nulls_for_each_from(sk, node) {
if (seq_sk_match(seq, sk))
return sk;
}
@@ -2621,7 +2618,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
jiffies_to_clock_t(icsk->icsk_rto),
jiffies_to_clock_t(icsk->icsk_ack.ato),
(icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sk),
- tp->snd_cwnd,
+ tcp_snd_cwnd(tp),
state == TCP_LISTEN ?
fastopenq->max_qlen :
(tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh));
@@ -2730,16 +2727,15 @@ static unsigned int bpf_iter_tcp_listening_batch(struct seq_file *seq,
{
struct bpf_tcp_iter_state *iter = seq->private;
struct tcp_iter_state *st = &iter->state;
- struct inet_connection_sock *icsk;
+ struct hlist_nulls_node *node;
unsigned int expected = 1;
struct sock *sk;
sock_hold(start_sk);
iter->batch[iter->end_sk++] = start_sk;
- icsk = inet_csk(start_sk);
- inet_lhash2_for_each_icsk_continue(icsk) {
- sk = (struct sock *)icsk;
+ sk = sk_nulls_next(start_sk);
+ sk_nulls_for_each_from(sk, node) {
if (seq_sk_match(seq, sk)) {
if (iter->end_sk < iter->max_sk) {
sock_hold(sk);
diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c
index 82b36ec3f2f8..ae36780977d2 100644
--- a/net/ipv4/tcp_lp.c
+++ b/net/ipv4/tcp_lp.c
@@ -297,7 +297,7 @@ static void tcp_lp_pkts_acked(struct sock *sk, const struct ack_sample *sample)
lp->flag &= ~LP_WITHIN_THR;
pr_debug("TCP-LP: %05o|%5u|%5u|%15u|%15u|%15u\n", lp->flag,
- tp->snd_cwnd, lp->remote_hz, lp->owd_min, lp->owd_max,
+ tcp_snd_cwnd(tp), lp->remote_hz, lp->owd_min, lp->owd_max,
lp->sowd >> 3);
if (lp->flag & LP_WITHIN_THR)
@@ -313,12 +313,12 @@ static void tcp_lp_pkts_acked(struct sock *sk, const struct ack_sample *sample)
/* happened within inference
* drop snd_cwnd into 1 */
if (lp->flag & LP_WITHIN_INF)
- tp->snd_cwnd = 1U;
+ tcp_snd_cwnd_set(tp, 1U);
/* happened after inference
* cut snd_cwnd into half */
else
- tp->snd_cwnd = max(tp->snd_cwnd >> 1U, 1U);
+ tcp_snd_cwnd_set(tp, max(tcp_snd_cwnd(tp) >> 1U, 1U));
/* record this drop time */
lp->last_drop = now;
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 0588b004ddac..7029b0e98edb 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -388,15 +388,15 @@ void tcp_update_metrics(struct sock *sk)
if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save &&
!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) {
val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
- if (val && (tp->snd_cwnd >> 1) > val)
+ if (val && (tcp_snd_cwnd(tp) >> 1) > val)
tcp_metric_set(tm, TCP_METRIC_SSTHRESH,
- tp->snd_cwnd >> 1);
+ tcp_snd_cwnd(tp) >> 1);
}
if (!tcp_metric_locked(tm, TCP_METRIC_CWND)) {
val = tcp_metric_get(tm, TCP_METRIC_CWND);
- if (tp->snd_cwnd > val)
+ if (tcp_snd_cwnd(tp) > val)
tcp_metric_set(tm, TCP_METRIC_CWND,
- tp->snd_cwnd);
+ tcp_snd_cwnd(tp));
}
} else if (!tcp_in_slow_start(tp) &&
icsk->icsk_ca_state == TCP_CA_Open) {
@@ -404,10 +404,10 @@ void tcp_update_metrics(struct sock *sk)
if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save &&
!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH))
tcp_metric_set(tm, TCP_METRIC_SSTHRESH,
- max(tp->snd_cwnd >> 1, tp->snd_ssthresh));
+ max(tcp_snd_cwnd(tp) >> 1, tp->snd_ssthresh));
if (!tcp_metric_locked(tm, TCP_METRIC_CWND)) {
val = tcp_metric_get(tm, TCP_METRIC_CWND);
- tcp_metric_set(tm, TCP_METRIC_CWND, (val + tp->snd_cwnd) >> 1);
+ tcp_metric_set(tm, TCP_METRIC_CWND, (val + tcp_snd_cwnd(tp)) >> 1);
}
} else {
/* Else slow start did not finish, cwnd is non-sense,
diff --git a/net/ipv4/tcp_nv.c b/net/ipv4/tcp_nv.c
index ab552356bdba..a60662f4bdf9 100644
--- a/net/ipv4/tcp_nv.c
+++ b/net/ipv4/tcp_nv.c
@@ -197,10 +197,10 @@ static void tcpnv_cong_avoid(struct sock *sk, u32 ack, u32 acked)
}
if (ca->cwnd_growth_factor < 0) {
- cnt = tp->snd_cwnd << -ca->cwnd_growth_factor;
+ cnt = tcp_snd_cwnd(tp) << -ca->cwnd_growth_factor;
tcp_cong_avoid_ai(tp, cnt, acked);
} else {
- cnt = max(4U, tp->snd_cwnd >> ca->cwnd_growth_factor);
+ cnt = max(4U, tcp_snd_cwnd(tp) >> ca->cwnd_growth_factor);
tcp_cong_avoid_ai(tp, cnt, acked);
}
}
@@ -209,7 +209,7 @@ static u32 tcpnv_recalc_ssthresh(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
- return max((tp->snd_cwnd * nv_loss_dec_factor) >> 10, 2U);
+ return max((tcp_snd_cwnd(tp) * nv_loss_dec_factor) >> 10, 2U);
}
static void tcpnv_state(struct sock *sk, u8 new_state)
@@ -257,7 +257,7 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample)
return;
/* Stop cwnd growth if we were in catch up mode */
- if (ca->nv_catchup && tp->snd_cwnd >= nv_min_cwnd) {
+ if (ca->nv_catchup && tcp_snd_cwnd(tp) >= nv_min_cwnd) {
ca->nv_catchup = 0;
ca->nv_allow_cwnd_growth = 0;
}
@@ -371,7 +371,7 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample)
* if cwnd < max_win, grow cwnd
* else leave the same
*/
- if (tp->snd_cwnd > max_win) {
+ if (tcp_snd_cwnd(tp) > max_win) {
/* there is congestion, check that it is ok
* to make a CA decision
* 1. We should have at least nv_dec_eval_min_calls
@@ -398,20 +398,20 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample)
ca->nv_allow_cwnd_growth = 0;
tp->snd_ssthresh =
(nv_ssthresh_factor * max_win) >> 3;
- if (tp->snd_cwnd - max_win > 2) {
+ if (tcp_snd_cwnd(tp) - max_win > 2) {
/* gap > 2, we do exponential cwnd decrease */
int dec;
- dec = max(2U, ((tp->snd_cwnd - max_win) *
+ dec = max(2U, ((tcp_snd_cwnd(tp) - max_win) *
nv_cong_dec_mult) >> 7);
- tp->snd_cwnd -= dec;
+ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) - dec);
} else if (nv_cong_dec_mult > 0) {
- tp->snd_cwnd = max_win;
+ tcp_snd_cwnd_set(tp, max_win);
}
if (ca->cwnd_growth_factor > 0)
ca->cwnd_growth_factor = 0;
ca->nv_no_cong_cnt = 0;
- } else if (tp->snd_cwnd <= max_win - nv_pad_buffer) {
+ } else if (tcp_snd_cwnd(tp) <= max_win - nv_pad_buffer) {
/* There is no congestion, grow cwnd if allowed*/
if (ca->nv_eval_call_cnt < nv_inc_eval_min_calls)
return;
@@ -444,8 +444,8 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample)
* (it wasn't before, if it is now is because nv
* decreased it).
*/
- if (tp->snd_cwnd < nv_min_cwnd)
- tp->snd_cwnd = nv_min_cwnd;
+ if (tcp_snd_cwnd(tp) < nv_min_cwnd)
+ tcp_snd_cwnd_set(tp, nv_min_cwnd);
}
}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 1ca2f28c9981..b4b2284ed4a2 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -143,7 +143,7 @@ void tcp_cwnd_restart(struct sock *sk, s32 delta)
{
struct tcp_sock *tp = tcp_sk(sk);
u32 restart_cwnd = tcp_init_cwnd(tp, __sk_dst_get(sk));
- u32 cwnd = tp->snd_cwnd;
+ u32 cwnd = tcp_snd_cwnd(tp);
tcp_ca_event(sk, CA_EVENT_CWND_RESTART);
@@ -152,7 +152,7 @@ void tcp_cwnd_restart(struct sock *sk, s32 delta)
while ((delta -= inet_csk(sk)->icsk_rto) > 0 && cwnd > restart_cwnd)
cwnd >>= 1;
- tp->snd_cwnd = max(cwnd, restart_cwnd);
+ tcp_snd_cwnd_set(tp, max(cwnd, restart_cwnd));
tp->snd_cwnd_stamp = tcp_jiffies32;
tp->snd_cwnd_used = 0;
}
@@ -445,12 +445,13 @@ struct tcp_out_options {
struct mptcp_out_options mptcp;
};
-static void mptcp_options_write(__be32 *ptr, const struct tcp_sock *tp,
+static void mptcp_options_write(struct tcphdr *th, __be32 *ptr,
+ struct tcp_sock *tp,
struct tcp_out_options *opts)
{
#if IS_ENABLED(CONFIG_MPTCP)
if (unlikely(OPTION_MPTCP & opts->options))
- mptcp_write_options(ptr, tp, &opts->mptcp);
+ mptcp_write_options(th, ptr, tp, &opts->mptcp);
#endif
}
@@ -606,9 +607,10 @@ static void bpf_skops_write_hdr_opt(struct sock *sk, struct sk_buff *skb,
* At least SACK_PERM as the first option is known to lead to a disaster
* (but it may well be that other scenarios fail similarly).
*/
-static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
+static void tcp_options_write(struct tcphdr *th, struct tcp_sock *tp,
struct tcp_out_options *opts)
{
+ __be32 *ptr = (__be32 *)(th + 1);
u16 options = opts->options; /* mungable copy */
if (unlikely(OPTION_MD5 & options)) {
@@ -702,7 +704,7 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
smc_options_write(ptr, &options);
- mptcp_options_write(ptr, tp, opts);
+ mptcp_options_write(th, ptr, tp, opts);
}
static void smc_set_option(const struct tcp_sock *tp,
@@ -1014,7 +1016,7 @@ static void tcp_tsq_write(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
if (tp->lost_out > tp->retrans_out &&
- tp->snd_cwnd > tcp_packets_in_flight(tp)) {
+ tcp_snd_cwnd(tp) > tcp_packets_in_flight(tp)) {
tcp_mstamp_refresh(tp);
tcp_xmit_retransmit_queue(sk);
}
@@ -1355,7 +1357,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
th->window = htons(min(tp->rcv_wnd, 65535U));
}
- tcp_options_write((__be32 *)(th + 1), tp, &opts);
+ tcp_options_write(th, tp, &opts);
#ifdef CONFIG_TCP_MD5SIG
/* Calculate the MD5 hash, as we have all we need now */
@@ -1551,7 +1553,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
* SO_SNDBUF values.
* Also allow first and last skb in retransmit queue to be split.
*/
- limit = sk->sk_sndbuf + 2 * SKB_TRUESIZE(GSO_MAX_SIZE);
+ limit = sk->sk_sndbuf + 2 * SKB_TRUESIZE(GSO_LEGACY_MAX_SIZE);
if (unlikely((sk->sk_wmem_queued >> 1) > limit &&
tcp_queue != TCP_FRAG_IN_WRITE_QUEUE &&
skb != tcp_rtx_queue_head(sk) &&
@@ -1861,9 +1863,9 @@ static void tcp_cwnd_application_limited(struct sock *sk)
/* Limited by application or receiver window. */
u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk));
u32 win_used = max(tp->snd_cwnd_used, init_win);
- if (win_used < tp->snd_cwnd) {
+ if (win_used < tcp_snd_cwnd(tp)) {
tp->snd_ssthresh = tcp_current_ssthresh(sk);
- tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1;
+ tcp_snd_cwnd_set(tp, (tcp_snd_cwnd(tp) + win_used) >> 1);
}
tp->snd_cwnd_used = 0;
}
@@ -2044,7 +2046,7 @@ static inline unsigned int tcp_cwnd_test(const struct tcp_sock *tp,
return 1;
in_flight = tcp_packets_in_flight(tp);
- cwnd = tp->snd_cwnd;
+ cwnd = tcp_snd_cwnd(tp);
if (in_flight >= cwnd)
return 0;
@@ -2197,12 +2199,12 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
in_flight = tcp_packets_in_flight(tp);
BUG_ON(tcp_skb_pcount(skb) <= 1);
- BUG_ON(tp->snd_cwnd <= in_flight);
+ BUG_ON(tcp_snd_cwnd(tp) <= in_flight);
send_win = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
/* From in_flight test above, we know that cwnd > in_flight. */
- cong_win = (tp->snd_cwnd - in_flight) * tp->mss_cache;
+ cong_win = (tcp_snd_cwnd(tp) - in_flight) * tp->mss_cache;
limit = min(send_win, cong_win);
@@ -2216,7 +2218,7 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
win_divisor = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tso_win_divisor);
if (win_divisor) {
- u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache);
+ u32 chunk = min(tp->snd_wnd, tcp_snd_cwnd(tp) * tp->mss_cache);
/* If at least some fraction of a window is available,
* just use it.
@@ -2346,7 +2348,7 @@ static int tcp_mtu_probe(struct sock *sk)
if (likely(!icsk->icsk_mtup.enabled ||
icsk->icsk_mtup.probe_size ||
inet_csk(sk)->icsk_ca_state != TCP_CA_Open ||
- tp->snd_cwnd < 11 ||
+ tcp_snd_cwnd(tp) < 11 ||
tp->rx_opt.num_sacks || tp->rx_opt.dsack))
return -1;
@@ -2382,7 +2384,7 @@ static int tcp_mtu_probe(struct sock *sk)
return 0;
/* Do we need to wait to drain cwnd? With none in flight, don't stall */
- if (tcp_packets_in_flight(tp) + 2 > tp->snd_cwnd) {
+ if (tcp_packets_in_flight(tp) + 2 > tcp_snd_cwnd(tp)) {
if (!tcp_packets_in_flight(tp))
return -1;
else
@@ -2451,7 +2453,7 @@ static int tcp_mtu_probe(struct sock *sk)
if (!tcp_transmit_skb(sk, nskb, 1, GFP_ATOMIC)) {
/* Decrement cwnd here because we are sending
* effectively two packets. */
- tp->snd_cwnd--;
+ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) - 1);
tcp_event_new_data_sent(sk, nskb);
icsk->icsk_mtup.probe_size = tcp_mss_to_mtu(sk, nskb->len);
@@ -2709,7 +2711,7 @@ repair:
else
tcp_chrono_stop(sk, TCP_CHRONO_RWND_LIMITED);
- is_cwnd_limited |= (tcp_packets_in_flight(tp) >= tp->snd_cwnd);
+ is_cwnd_limited |= (tcp_packets_in_flight(tp) >= tcp_snd_cwnd(tp));
if (likely(sent_pkts || is_cwnd_limited))
tcp_cwnd_validate(sk, is_cwnd_limited);
@@ -2819,7 +2821,7 @@ void tcp_send_loss_probe(struct sock *sk)
if (unlikely(!skb)) {
WARN_ONCE(tp->packets_out,
"invalid inflight: %u state %u cwnd %u mss %d\n",
- tp->packets_out, sk->sk_state, tp->snd_cwnd, mss);
+ tp->packets_out, sk->sk_state, tcp_snd_cwnd(tp), mss);
inet_csk(sk)->icsk_pending = 0;
return;
}
@@ -3303,7 +3305,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
if (!hole)
tp->retransmit_skb_hint = skb;
- segs = tp->snd_cwnd - tcp_packets_in_flight(tp);
+ segs = tcp_snd_cwnd(tp) - tcp_packets_in_flight(tp);
if (segs <= 0)
break;
sacked = TCP_SKB_CB(skb)->sacked;
@@ -3591,7 +3593,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
/* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */
th->window = htons(min(req->rsk_rcv_wnd, 65535U));
- tcp_options_write((__be32 *)(th + 1), NULL, &opts);
+ tcp_options_write(th, NULL, &opts);
th->doff = (tcp_header_size >> 2);
__TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS);
diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c
index 9a8e014d9b5b..a8f6d9d06f2e 100644
--- a/net/ipv4/tcp_rate.c
+++ b/net/ipv4/tcp_rate.c
@@ -200,7 +200,7 @@ void tcp_rate_check_app_limited(struct sock *sk)
/* Nothing in sending host's qdisc queues or NIC tx queue. */
sk_wmem_alloc_get(sk) < SKB_TRUESIZE(1) &&
/* We are not limited by CWND. */
- tcp_packets_in_flight(tp) < tp->snd_cwnd &&
+ tcp_packets_in_flight(tp) < tcp_snd_cwnd(tp) &&
/* All lost packets have been retransmitted. */
tp->lost_out <= tp->retrans_out)
tp->app_limited =
diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
index fd113f6226ef..48f30e7209f2 100644
--- a/net/ipv4/tcp_recovery.c
+++ b/net/ipv4/tcp_recovery.c
@@ -2,11 +2,6 @@
#include <linux/tcp.h>
#include <net/tcp.h>
-static bool tcp_rack_sent_after(u64 t1, u64 t2, u32 seq1, u32 seq2)
-{
- return t1 > t2 || (t1 == t2 && after(seq1, seq2));
-}
-
static u32 tcp_rack_reo_wnd(const struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -77,9 +72,9 @@ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout)
!(scb->sacked & TCPCB_SACKED_RETRANS))
continue;
- if (!tcp_rack_sent_after(tp->rack.mstamp,
- tcp_skb_timestamp_us(skb),
- tp->rack.end_seq, scb->end_seq))
+ if (!tcp_skb_sent_after(tp->rack.mstamp,
+ tcp_skb_timestamp_us(skb),
+ tp->rack.end_seq, scb->end_seq))
break;
/* A packet is lost if it has not been s/acked beyond
@@ -140,8 +135,8 @@ void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
}
tp->rack.advanced = 1;
tp->rack.rtt_us = rtt_us;
- if (tcp_rack_sent_after(xmit_time, tp->rack.mstamp,
- end_seq, tp->rack.end_seq)) {
+ if (tcp_skb_sent_after(xmit_time, tp->rack.mstamp,
+ end_seq, tp->rack.end_seq)) {
tp->rack.mstamp = xmit_time;
tp->rack.end_seq = end_seq;
}
diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c
index 5842081bc8a2..862b96248a92 100644
--- a/net/ipv4/tcp_scalable.c
+++ b/net/ipv4/tcp_scalable.c
@@ -27,7 +27,7 @@ static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked)
if (!acked)
return;
}
- tcp_cong_avoid_ai(tp, min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT),
+ tcp_cong_avoid_ai(tp, min(tcp_snd_cwnd(tp), TCP_SCALABLE_AI_CNT),
acked);
}
@@ -35,7 +35,7 @@ static u32 tcp_scalable_ssthresh(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
- return max(tp->snd_cwnd - (tp->snd_cwnd>>TCP_SCALABLE_MD_SCALE), 2U);
+ return max(tcp_snd_cwnd(tp) - (tcp_snd_cwnd(tp)>>TCP_SCALABLE_MD_SCALE), 2U);
}
static struct tcp_congestion_ops tcp_scalable __read_mostly = {
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index c8003c8aad2c..786848ad37ea 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -159,7 +159,7 @@ EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event);
static inline u32 tcp_vegas_ssthresh(struct tcp_sock *tp)
{
- return min(tp->snd_ssthresh, tp->snd_cwnd);
+ return min(tp->snd_ssthresh, tcp_snd_cwnd(tp));
}
static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
@@ -217,14 +217,14 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
* This is:
* (actual rate in segments) * baseRTT
*/
- target_cwnd = (u64)tp->snd_cwnd * vegas->baseRTT;
+ target_cwnd = (u64)tcp_snd_cwnd(tp) * vegas->baseRTT;
do_div(target_cwnd, rtt);
/* Calculate the difference between the window we had,
* and the window we would like to have. This quantity
* is the "Diff" from the Arizona Vegas papers.
*/
- diff = tp->snd_cwnd * (rtt-vegas->baseRTT) / vegas->baseRTT;
+ diff = tcp_snd_cwnd(tp) * (rtt-vegas->baseRTT) / vegas->baseRTT;
if (diff > gamma && tcp_in_slow_start(tp)) {
/* Going too fast. Time to slow down
@@ -238,7 +238,8 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
* truncation robs us of full link
* utilization.
*/
- tp->snd_cwnd = min(tp->snd_cwnd, (u32)target_cwnd+1);
+ tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp),
+ (u32)target_cwnd + 1));
tp->snd_ssthresh = tcp_vegas_ssthresh(tp);
} else if (tcp_in_slow_start(tp)) {
@@ -254,14 +255,14 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
/* The old window was too fast, so
* we slow down.
*/
- tp->snd_cwnd--;
+ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) - 1);
tp->snd_ssthresh
= tcp_vegas_ssthresh(tp);
} else if (diff < alpha) {
/* We don't have enough extra packets
* in the network, so speed up.
*/
- tp->snd_cwnd++;
+ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1);
} else {
/* Sending just as fast as we
* should be.
@@ -269,10 +270,10 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
}
}
- if (tp->snd_cwnd < 2)
- tp->snd_cwnd = 2;
- else if (tp->snd_cwnd > tp->snd_cwnd_clamp)
- tp->snd_cwnd = tp->snd_cwnd_clamp;
+ if (tcp_snd_cwnd(tp) < 2)
+ tcp_snd_cwnd_set(tp, 2);
+ else if (tcp_snd_cwnd(tp) > tp->snd_cwnd_clamp)
+ tcp_snd_cwnd_set(tp, tp->snd_cwnd_clamp);
tp->snd_ssthresh = tcp_current_ssthresh(sk);
}
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index cd50a61c9976..366ff6f214b2 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c
@@ -146,11 +146,11 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
rtt = veno->minrtt;
- target_cwnd = (u64)tp->snd_cwnd * veno->basertt;
+ target_cwnd = (u64)tcp_snd_cwnd(tp) * veno->basertt;
target_cwnd <<= V_PARAM_SHIFT;
do_div(target_cwnd, rtt);
- veno->diff = (tp->snd_cwnd << V_PARAM_SHIFT) - target_cwnd;
+ veno->diff = (tcp_snd_cwnd(tp) << V_PARAM_SHIFT) - target_cwnd;
if (tcp_in_slow_start(tp)) {
/* Slow start. */
@@ -164,15 +164,15 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
/* In the "non-congestive state", increase cwnd
* every rtt.
*/
- tcp_cong_avoid_ai(tp, tp->snd_cwnd, acked);
+ tcp_cong_avoid_ai(tp, tcp_snd_cwnd(tp), acked);
} else {
/* In the "congestive state", increase cwnd
* every other rtt.
*/
- if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
+ if (tp->snd_cwnd_cnt >= tcp_snd_cwnd(tp)) {
if (veno->inc &&
- tp->snd_cwnd < tp->snd_cwnd_clamp) {
- tp->snd_cwnd++;
+ tcp_snd_cwnd(tp) < tp->snd_cwnd_clamp) {
+ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1);
veno->inc = 0;
} else
veno->inc = 1;
@@ -181,10 +181,10 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
tp->snd_cwnd_cnt += acked;
}
done:
- if (tp->snd_cwnd < 2)
- tp->snd_cwnd = 2;
- else if (tp->snd_cwnd > tp->snd_cwnd_clamp)
- tp->snd_cwnd = tp->snd_cwnd_clamp;
+ if (tcp_snd_cwnd(tp) < 2)
+ tcp_snd_cwnd_set(tp, 2);
+ else if (tcp_snd_cwnd(tp) > tp->snd_cwnd_clamp)
+ tcp_snd_cwnd_set(tp, tp->snd_cwnd_clamp);
}
/* Wipe the slate clean for the next rtt. */
/* veno->cntrtt = 0; */
@@ -199,10 +199,10 @@ static u32 tcp_veno_ssthresh(struct sock *sk)
if (veno->diff < beta)
/* in "non-congestive state", cut cwnd by 1/5 */
- return max(tp->snd_cwnd * 4 / 5, 2U);
+ return max(tcp_snd_cwnd(tp) * 4 / 5, 2U);
else
/* in "congestive state", cut cwnd by 1/2 */
- return max(tp->snd_cwnd >> 1U, 2U);
+ return max(tcp_snd_cwnd(tp) >> 1U, 2U);
}
static struct tcp_congestion_ops tcp_veno __read_mostly = {
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c
index b2e05c4cea00..c6e97141eef2 100644
--- a/net/ipv4/tcp_westwood.c
+++ b/net/ipv4/tcp_westwood.c
@@ -244,7 +244,8 @@ static void tcp_westwood_event(struct sock *sk, enum tcp_ca_event event)
switch (event) {
case CA_EVENT_COMPLETE_CWR:
- tp->snd_cwnd = tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk);
+ tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk);
+ tcp_snd_cwnd_set(tp, tp->snd_ssthresh);
break;
case CA_EVENT_LOSS:
tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk);
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
index 07c4c93b9fdb..18b07ff5d20e 100644
--- a/net/ipv4/tcp_yeah.c
+++ b/net/ipv4/tcp_yeah.c
@@ -71,11 +71,11 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked)
if (!yeah->doing_reno_now) {
/* Scalable */
- tcp_cong_avoid_ai(tp, min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT),
+ tcp_cong_avoid_ai(tp, min(tcp_snd_cwnd(tp), TCP_SCALABLE_AI_CNT),
acked);
} else {
/* Reno */
- tcp_cong_avoid_ai(tp, tp->snd_cwnd, acked);
+ tcp_cong_avoid_ai(tp, tcp_snd_cwnd(tp), acked);
}
/* The key players are v_vegas.beg_snd_una and v_beg_snd_nxt.
@@ -130,7 +130,7 @@ do_vegas:
/* Compute excess number of packets above bandwidth
* Avoid doing full 64 bit divide.
*/
- bw = tp->snd_cwnd;
+ bw = tcp_snd_cwnd(tp);
bw *= rtt - yeah->vegas.baseRTT;
do_div(bw, rtt);
queue = bw;
@@ -138,20 +138,20 @@ do_vegas:
if (queue > TCP_YEAH_ALPHA ||
rtt - yeah->vegas.baseRTT > (yeah->vegas.baseRTT / TCP_YEAH_PHY)) {
if (queue > TCP_YEAH_ALPHA &&
- tp->snd_cwnd > yeah->reno_count) {
+ tcp_snd_cwnd(tp) > yeah->reno_count) {
u32 reduction = min(queue / TCP_YEAH_GAMMA ,
- tp->snd_cwnd >> TCP_YEAH_EPSILON);
+ tcp_snd_cwnd(tp) >> TCP_YEAH_EPSILON);
- tp->snd_cwnd -= reduction;
+ tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) - reduction);
- tp->snd_cwnd = max(tp->snd_cwnd,
- yeah->reno_count);
+ tcp_snd_cwnd_set(tp, max(tcp_snd_cwnd(tp),
+ yeah->reno_count));
- tp->snd_ssthresh = tp->snd_cwnd;
+ tp->snd_ssthresh = tcp_snd_cwnd(tp);
}
if (yeah->reno_count <= 2)
- yeah->reno_count = max(tp->snd_cwnd>>1, 2U);
+ yeah->reno_count = max(tcp_snd_cwnd(tp)>>1, 2U);
else
yeah->reno_count++;
@@ -176,7 +176,7 @@ do_vegas:
*/
yeah->vegas.beg_snd_una = yeah->vegas.beg_snd_nxt;
yeah->vegas.beg_snd_nxt = tp->snd_nxt;
- yeah->vegas.beg_snd_cwnd = tp->snd_cwnd;
+ yeah->vegas.beg_snd_cwnd = tcp_snd_cwnd(tp);
/* Wipe the slate clean for the next RTT. */
yeah->vegas.cntRTT = 0;
@@ -193,16 +193,16 @@ static u32 tcp_yeah_ssthresh(struct sock *sk)
if (yeah->doing_reno_now < TCP_YEAH_RHO) {
reduction = yeah->lastQ;
- reduction = min(reduction, max(tp->snd_cwnd>>1, 2U));
+ reduction = min(reduction, max(tcp_snd_cwnd(tp)>>1, 2U));
- reduction = max(reduction, tp->snd_cwnd >> TCP_YEAH_DELTA);
+ reduction = max(reduction, tcp_snd_cwnd(tp) >> TCP_YEAH_DELTA);
} else
- reduction = max(tp->snd_cwnd>>1, 2U);
+ reduction = max(tcp_snd_cwnd(tp)>>1, 2U);
yeah->fast_count = 0;
yeah->reno_count = max(yeah->reno_count>>1, 2U);
- return max_t(int, tp->snd_cwnd - reduction, 2);
+ return max_t(int, tcp_snd_cwnd(tp) - reduction, 2);
}
static struct tcp_congestion_ops tcp_yeah __read_mostly = {
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 6b4d8361560f..aa9f2ec3dc46 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1726,7 +1726,7 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
EXPORT_SYMBOL(udp_ioctl);
struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags,
- int noblock, int *off, int *err)
+ int *off, int *err)
{
struct sk_buff_head *sk_queue = &sk->sk_receive_queue;
struct sk_buff_head *queue;
@@ -1735,7 +1735,6 @@ struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags,
int error;
queue = &udp_sk(sk)->reader_queue;
- flags |= noblock ? MSG_DONTWAIT : 0;
timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
do {
struct sk_buff *skb;
@@ -1805,7 +1804,7 @@ int udp_read_sock(struct sock *sk, read_descriptor_t *desc,
struct sk_buff *skb;
int err, used;
- skb = skb_recv_udp(sk, 0, 1, &err);
+ skb = skb_recv_udp(sk, MSG_DONTWAIT, &err);
if (!skb)
return err;
@@ -1843,8 +1842,8 @@ EXPORT_SYMBOL(udp_read_sock);
* return it, otherwise we block.
*/
-int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
- int flags, int *addr_len)
+int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
+ int *addr_len)
{
struct inet_sock *inet = inet_sk(sk);
DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name);
@@ -1859,7 +1858,7 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
try_again:
off = sk_peek_offset(sk, flags);
- skb = __skb_recv_udp(sk, flags, noblock, &off, &err);
+ skb = __skb_recv_udp(sk, flags, &off, &err);
if (!skb)
return err;
@@ -1910,7 +1909,7 @@ try_again:
UDP_INC_STATS(sock_net(sk),
UDP_MIB_INDATAGRAMS, is_udplite);
- sock_recv_ts_and_drops(msg, sk, skb);
+ sock_recv_cmsgs(msg, sk, skb);
/* Copy the address. */
if (sin) {
@@ -2564,8 +2563,7 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net,
struct sock *sk;
udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
- if (INET_MATCH(sk, net, acookie, rmt_addr,
- loc_addr, ports, dif, sdif))
+ if (inet_match(net, sk, acookie, ports, dif, sdif))
return sk;
/* Only check first socket in chain */
break;
diff --git a/net/ipv4/udp_bpf.c b/net/ipv4/udp_bpf.c
index bbe6569c9ad3..ff15918b7bdc 100644
--- a/net/ipv4/udp_bpf.c
+++ b/net/ipv4/udp_bpf.c
@@ -11,14 +11,13 @@
static struct proto *udpv6_prot_saved __read_mostly;
static int sk_udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
- int noblock, int flags, int *addr_len)
+ int flags, int *addr_len)
{
#if IS_ENABLED(CONFIG_IPV6)
if (sk->sk_family == AF_INET6)
- return udpv6_prot_saved->recvmsg(sk, msg, len, noblock, flags,
- addr_len);
+ return udpv6_prot_saved->recvmsg(sk, msg, len, flags, addr_len);
#endif
- return udp_prot.recvmsg(sk, msg, len, noblock, flags, addr_len);
+ return udp_prot.recvmsg(sk, msg, len, flags, addr_len);
}
static bool udp_sk_has_data(struct sock *sk)
@@ -61,7 +60,7 @@ static int udp_msg_wait_data(struct sock *sk, struct sk_psock *psock,
}
static int udp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
- int nonblock, int flags, int *addr_len)
+ int flags, int *addr_len)
{
struct sk_psock *psock;
int copied, ret;
@@ -71,10 +70,10 @@ static int udp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
psock = sk_psock_get(sk);
if (unlikely(!psock))
- return sk_udp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
+ return sk_udp_recvmsg(sk, msg, len, flags, addr_len);
if (!psock_has_data(psock)) {
- ret = sk_udp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
+ ret = sk_udp_recvmsg(sk, msg, len, flags, addr_len);
goto out;
}
@@ -84,12 +83,12 @@ msg_bytes_ready:
long timeo;
int data;
- timeo = sock_rcvtimeo(sk, nonblock);
+ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
data = udp_msg_wait_data(sk, psock, timeo);
if (data) {
if (psock_has_data(psock))
goto msg_bytes_ready;
- ret = sk_udp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
+ ret = sk_udp_recvmsg(sk, msg, len, flags, addr_len);
goto out;
}
copied = -EAGAIN;
diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h
index 2878d8285caf..4ba7a88a1b1d 100644
--- a/net/ipv4/udp_impl.h
+++ b/net/ipv4/udp_impl.h
@@ -17,8 +17,8 @@ int udp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
int udp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen);
-int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
- int flags, int *addr_len);
+int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
+ int *addr_len);
int udp_sendpage(struct sock *sk, struct page *page, int offset, size_t size,
int flags);
void udp_destroy_sock(struct sock *sk);