summaryrefslogtreecommitdiff
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/arp.c9
-rw-r--r--net/ipv4/esp4.c2
-rw-r--r--net/ipv4/fou_bpf.c2
-rw-r--r--net/ipv4/gre_demux.c2
-rw-r--r--net/ipv4/igmp.c3
-rw-r--r--net/ipv4/inet_fragment.c4
-rw-r--r--net/ipv4/inet_timewait_sock.c16
-rw-r--r--net/ipv4/ip_gre.c144
-rw-r--r--net/ipv4/ip_tunnel.c111
-rw-r--r--net/ipv4/ip_tunnel_core.c82
-rw-r--r--net/ipv4/ip_vti.c41
-rw-r--r--net/ipv4/ipip.c33
-rw-r--r--net/ipv4/ipmr.c2
-rw-r--r--net/ipv4/syncookies.c3
-rw-r--r--net/ipv4/tcp.c23
-rw-r--r--net/ipv4/tcp_input.c59
-rw-r--r--net/ipv4/tcp_ipv4.c21
-rw-r--r--net/ipv4/tcp_minisocks.c10
-rw-r--r--net/ipv4/tcp_offload.c4
-rw-r--r--net/ipv4/tcp_output.c18
-rw-r--r--net/ipv4/tcp_timer.c4
-rw-r--r--net/ipv4/udp.c34
-rw-r--r--net/ipv4/udp_tunnel_core.c5
23 files changed, 384 insertions, 248 deletions
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 0d0d725b46ad..ab82ca104496 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -456,7 +456,8 @@ static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev)
/*unsigned long now; */
struct net *net = dev_net(dev);
- rt = ip_route_output(net, sip, tip, 0, l3mdev_master_ifindex_rcu(dev));
+ rt = ip_route_output(net, sip, tip, 0, l3mdev_master_ifindex_rcu(dev),
+ RT_SCOPE_UNIVERSE);
if (IS_ERR(rt))
return 1;
if (rt->dst.dev != dev) {
@@ -1056,7 +1057,8 @@ static int arp_req_set(struct net *net, struct arpreq *r,
if (r->arp_flags & ATF_PERM)
r->arp_flags |= ATF_COM;
if (!dev) {
- struct rtable *rt = ip_route_output(net, ip, 0, RTO_ONLINK, 0);
+ struct rtable *rt = ip_route_output(net, ip, 0, 0, 0,
+ RT_SCOPE_LINK);
if (IS_ERR(rt))
return PTR_ERR(rt);
@@ -1188,7 +1190,8 @@ static int arp_req_delete(struct net *net, struct arpreq *r,
ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
if (!dev) {
- struct rtable *rt = ip_route_output(net, ip, 0, RTO_ONLINK, 0);
+ struct rtable *rt = ip_route_output(net, ip, 0, 0, 0,
+ RT_SCOPE_LINK);
if (IS_ERR(rt))
return PTR_ERR(rt);
dev = rt->dst.dev;
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index d33d12421814..40330253f076 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -114,7 +114,7 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb)
*/
if (req->src != req->dst)
for (sg = sg_next(req->src); sg; sg = sg_next(sg))
- skb_page_unref(skb, sg_page(sg), false);
+ skb_page_unref(sg_page(sg), skb->pp_recycle);
}
#ifdef CONFIG_INET_ESPINTCP
diff --git a/net/ipv4/fou_bpf.c b/net/ipv4/fou_bpf.c
index 06e5572f296f..54984f3170a8 100644
--- a/net/ipv4/fou_bpf.c
+++ b/net/ipv4/fou_bpf.c
@@ -64,7 +64,7 @@ __bpf_kfunc int bpf_skb_set_fou_encap(struct __sk_buff *skb_ctx,
info->encap.type = TUNNEL_ENCAP_NONE;
}
- if (info->key.tun_flags & TUNNEL_CSUM)
+ if (test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags))
info->encap.flags |= TUNNEL_ENCAP_FLAG_CSUM;
info->encap.sport = encap->sport;
diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c
index 3757fd93523f..6701a98d9a9f 100644
--- a/net/ipv4/gre_demux.c
+++ b/net/ipv4/gre_demux.c
@@ -73,7 +73,7 @@ int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
return -EINVAL;
- tpi->flags = gre_flags_to_tnl_flags(greh->flags);
+ gre_flags_to_tnl_flags(tpi->flags, greh->flags);
hdr_len = gre_calc_hlen(tpi->flags);
if (!pskb_may_pull(skb, nhs + hdr_len))
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 717e97a389a8..9bf09de6a2e7 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1842,7 +1842,8 @@ static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr)
if (!dev) {
struct rtable *rt = ip_route_output(net,
imr->imr_multiaddr.s_addr,
- 0, 0, 0);
+ 0, 0, 0,
+ RT_SCOPE_UNIVERSE);
if (!IS_ERR(rt)) {
dev = rt->dst.dev;
ip_rt_put(rt);
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index c88c9034d630..faaec92a46ac 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -175,7 +175,7 @@ static void fqdir_free_fn(struct work_struct *work)
}
}
-static DECLARE_WORK(fqdir_free_work, fqdir_free_fn);
+static DECLARE_DELAYED_WORK(fqdir_free_work, fqdir_free_fn);
static void fqdir_work_fn(struct work_struct *work)
{
@@ -184,7 +184,7 @@ static void fqdir_work_fn(struct work_struct *work)
rhashtable_free_and_destroy(&fqdir->rhashtable, inet_frags_free_cb, NULL);
if (llist_add(&fqdir->free_list, &fqdir_free_list))
- queue_work(system_wq, &fqdir_free_work);
+ queue_delayed_work(system_wq, &fqdir_free_work, HZ);
}
int fqdir_init(struct fqdir **fqdirp, struct inet_frags *f, struct net *net)
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index e8de45d34d56..e28075f0006e 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -264,14 +264,18 @@ void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm)
EXPORT_SYMBOL_GPL(__inet_twsk_schedule);
/* Remove all non full sockets (TIME_WAIT and NEW_SYN_RECV) for dead netns */
-void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family)
+void inet_twsk_purge(struct inet_hashinfo *hashinfo)
{
+ struct inet_ehash_bucket *head = &hashinfo->ehash[0];
+ unsigned int ehash_mask = hashinfo->ehash_mask;
struct hlist_nulls_node *node;
unsigned int slot;
struct sock *sk;
- for (slot = 0; slot <= hashinfo->ehash_mask; slot++) {
- struct inet_ehash_bucket *head = &hashinfo->ehash[slot];
+ for (slot = 0; slot <= ehash_mask; slot++, head++) {
+ if (hlist_nulls_empty(&head->chain))
+ continue;
+
restart_rcu:
cond_resched();
rcu_read_lock();
@@ -283,15 +287,13 @@ restart:
TCPF_NEW_SYN_RECV))
continue;
- if (sk->sk_family != family ||
- refcount_read(&sock_net(sk)->ns.count))
+ if (refcount_read(&sock_net(sk)->ns.count))
continue;
if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt)))
continue;
- if (unlikely(sk->sk_family != family ||
- refcount_read(&sock_net(sk)->ns.count))) {
+ if (refcount_read(&sock_net(sk)->ns.count)) {
sock_gen_put(sk);
goto restart;
}
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 57ddcd8c62f6..c3af965dc407 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -265,6 +265,7 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
struct net *net = dev_net(skb->dev);
struct metadata_dst *tun_dst = NULL;
struct erspan_base_hdr *ershdr;
+ IP_TUNNEL_DECLARE_FLAGS(flags);
struct ip_tunnel_net *itn;
struct ip_tunnel *tunnel;
const struct iphdr *iph;
@@ -272,12 +273,14 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
int ver;
int len;
+ ip_tunnel_flags_copy(flags, tpi->flags);
+
itn = net_generic(net, erspan_net_id);
iph = ip_hdr(skb);
if (is_erspan_type1(gre_hdr_len)) {
ver = 0;
- tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
- tpi->flags | TUNNEL_NO_KEY,
+ __set_bit(IP_TUNNEL_NO_KEY_BIT, flags);
+ tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, flags,
iph->saddr, iph->daddr, 0);
} else {
if (unlikely(!pskb_may_pull(skb,
@@ -287,8 +290,8 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
ver = ershdr->ver;
iph = ip_hdr(skb);
- tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
- tpi->flags | TUNNEL_KEY,
+ __set_bit(IP_TUNNEL_KEY_BIT, flags);
+ tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, flags,
iph->saddr, iph->daddr, tpi->key);
}
@@ -312,10 +315,9 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
struct ip_tunnel_info *info;
unsigned char *gh;
__be64 tun_id;
- __be16 flags;
- tpi->flags |= TUNNEL_KEY;
- flags = tpi->flags;
+ __set_bit(IP_TUNNEL_KEY_BIT, tpi->flags);
+ ip_tunnel_flags_copy(flags, tpi->flags);
tun_id = key32_to_tunnel_id(tpi->key);
tun_dst = ip_tun_rx_dst(skb, flags,
@@ -338,7 +340,8 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
ERSPAN_V2_MDSIZE);
info = &tun_dst->u.tun_info;
- info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
+ __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT,
+ info->key.tun_flags);
info->options_len = sizeof(*md);
}
@@ -381,10 +384,13 @@ static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
tnl_params = &tunnel->parms.iph;
if (tunnel->collect_md || tnl_params->daddr == 0) {
- __be16 flags;
+ IP_TUNNEL_DECLARE_FLAGS(flags) = { };
__be64 tun_id;
- flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY);
+ __set_bit(IP_TUNNEL_CSUM_BIT, flags);
+ __set_bit(IP_TUNNEL_KEY_BIT, flags);
+ ip_tunnel_flags_and(flags, tpi->flags, flags);
+
tun_id = key32_to_tunnel_id(tpi->key);
tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0);
if (!tun_dst)
@@ -464,12 +470,15 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
__be16 proto)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
- __be16 flags = tunnel->parms.o_flags;
+ IP_TUNNEL_DECLARE_FLAGS(flags);
+
+ ip_tunnel_flags_copy(flags, tunnel->parms.o_flags);
/* Push GRE header. */
gre_build_header(skb, tunnel->tun_hlen,
flags, proto, tunnel->parms.o_key,
- (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);
+ test_bit(IP_TUNNEL_SEQ_BIT, flags) ?
+ htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);
ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
}
@@ -483,10 +492,10 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
__be16 proto)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
+ IP_TUNNEL_DECLARE_FLAGS(flags) = { };
struct ip_tunnel_info *tun_info;
const struct ip_tunnel_key *key;
int tunnel_hlen;
- __be16 flags;
tun_info = skb_tunnel_info(skb);
if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
@@ -500,14 +509,19 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
goto err_free_skb;
/* Push Tunnel header. */
- if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM)))
+ if (gre_handle_offloads(skb, test_bit(IP_TUNNEL_CSUM_BIT,
+ tunnel->parms.o_flags)))
goto err_free_skb;
- flags = tun_info->key.tun_flags &
- (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
+ __set_bit(IP_TUNNEL_CSUM_BIT, flags);
+ __set_bit(IP_TUNNEL_KEY_BIT, flags);
+ __set_bit(IP_TUNNEL_SEQ_BIT, flags);
+ ip_tunnel_flags_and(flags, tun_info->key.tun_flags, flags);
+
gre_build_header(skb, tunnel_hlen, flags, proto,
tunnel_id_to_key32(tun_info->key.tun_id),
- (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);
+ test_bit(IP_TUNNEL_SEQ_BIT, flags) ?
+ htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);
ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
@@ -521,6 +535,7 @@ err_free_skb:
static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
+ IP_TUNNEL_DECLARE_FLAGS(flags) = { };
struct ip_tunnel_info *tun_info;
const struct ip_tunnel_key *key;
struct erspan_metadata *md;
@@ -536,7 +551,7 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
goto err_free_skb;
key = &tun_info->key;
- if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT))
+ if (!test_bit(IP_TUNNEL_ERSPAN_OPT_BIT, tun_info->key.tun_flags))
goto err_free_skb;
if (tun_info->options_len < sizeof(*md))
goto err_free_skb;
@@ -589,8 +604,9 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
goto err_free_skb;
}
- gre_build_header(skb, 8, TUNNEL_SEQ,
- proto, 0, htonl(atomic_fetch_inc(&tunnel->o_seqno)));
+ __set_bit(IP_TUNNEL_SEQ_BIT, flags);
+ gre_build_header(skb, 8, flags, proto, 0,
+ htonl(atomic_fetch_inc(&tunnel->o_seqno)));
ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
@@ -664,7 +680,8 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
tnl_params = &tunnel->parms.iph;
}
- if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
+ if (gre_handle_offloads(skb, test_bit(IP_TUNNEL_CSUM_BIT,
+ tunnel->parms.o_flags)))
goto free_skb;
__gre_xmit(skb, dev, tnl_params, skb->protocol);
@@ -706,7 +723,7 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb,
/* Push ERSPAN header */
if (tunnel->erspan_ver == 0) {
proto = htons(ETH_P_ERSPAN);
- tunnel->parms.o_flags &= ~TUNNEL_SEQ;
+ __clear_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.o_flags);
} else if (tunnel->erspan_ver == 1) {
erspan_build_header(skb, ntohl(tunnel->parms.o_key),
tunnel->index,
@@ -721,7 +738,7 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb,
goto free_skb;
}
- tunnel->parms.o_flags &= ~TUNNEL_KEY;
+ __clear_bit(IP_TUNNEL_KEY_BIT, tunnel->parms.o_flags);
__gre_xmit(skb, dev, &tunnel->parms.iph, proto);
return NETDEV_TX_OK;
@@ -744,7 +761,8 @@ static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
return NETDEV_TX_OK;
}
- if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
+ if (gre_handle_offloads(skb, test_bit(IP_TUNNEL_CSUM_BIT,
+ tunnel->parms.o_flags)))
goto free_skb;
if (skb_cow_head(skb, dev->needed_headroom))
@@ -762,7 +780,6 @@ free_skb:
static void ipgre_link_update(struct net_device *dev, bool set_mtu)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
- __be16 flags;
int len;
len = tunnel->tun_hlen;
@@ -778,10 +795,9 @@ static void ipgre_link_update(struct net_device *dev, bool set_mtu)
if (set_mtu)
dev->mtu = max_t(int, dev->mtu - len, 68);
- flags = tunnel->parms.o_flags;
-
- if (flags & TUNNEL_SEQ ||
- (flags & TUNNEL_CSUM && tunnel->encap.type != TUNNEL_ENCAP_NONE)) {
+ if (test_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.o_flags) ||
+ (test_bit(IP_TUNNEL_CSUM_BIT, tunnel->parms.o_flags) &&
+ tunnel->encap.type != TUNNEL_ENCAP_NONE)) {
dev->features &= ~NETIF_F_GSO_SOFTWARE;
dev->hw_features &= ~NETIF_F_GSO_SOFTWARE;
} else {
@@ -790,20 +806,29 @@ static void ipgre_link_update(struct net_device *dev, bool set_mtu)
}
}
-static int ipgre_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p,
+static int ipgre_tunnel_ctl(struct net_device *dev,
+ struct ip_tunnel_parm_kern *p,
int cmd)
{
+ __be16 i_flags, o_flags;
int err;
+ if (!ip_tunnel_flags_is_be16_compat(p->i_flags) ||
+ !ip_tunnel_flags_is_be16_compat(p->o_flags))
+ return -EOVERFLOW;
+
+ i_flags = ip_tunnel_flags_to_be16(p->i_flags);
+ o_flags = ip_tunnel_flags_to_be16(p->o_flags);
+
if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
if (p->iph.version != 4 || p->iph.protocol != IPPROTO_GRE ||
p->iph.ihl != 5 || (p->iph.frag_off & htons(~IP_DF)) ||
- ((p->i_flags | p->o_flags) & (GRE_VERSION | GRE_ROUTING)))
+ ((i_flags | o_flags) & (GRE_VERSION | GRE_ROUTING)))
return -EINVAL;
}
- p->i_flags = gre_flags_to_tnl_flags(p->i_flags);
- p->o_flags = gre_flags_to_tnl_flags(p->o_flags);
+ gre_flags_to_tnl_flags(p->i_flags, i_flags);
+ gre_flags_to_tnl_flags(p->o_flags, o_flags);
err = ip_tunnel_ctl(dev, p, cmd);
if (err)
@@ -812,15 +837,18 @@ static int ipgre_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p,
if (cmd == SIOCCHGTUNNEL) {
struct ip_tunnel *t = netdev_priv(dev);
- t->parms.i_flags = p->i_flags;
- t->parms.o_flags = p->o_flags;
+ ip_tunnel_flags_copy(t->parms.i_flags, p->i_flags);
+ ip_tunnel_flags_copy(t->parms.o_flags, p->o_flags);
if (strcmp(dev->rtnl_link_ops->kind, "erspan"))
ipgre_link_update(dev, true);
}
- p->i_flags = gre_tnl_flags_to_gre_flags(p->i_flags);
- p->o_flags = gre_tnl_flags_to_gre_flags(p->o_flags);
+ i_flags = gre_tnl_flags_to_gre_flags(p->i_flags);
+ ip_tunnel_flags_from_be16(p->i_flags, i_flags);
+ o_flags = gre_tnl_flags_to_gre_flags(p->o_flags);
+ ip_tunnel_flags_from_be16(p->o_flags, o_flags);
+
return 0;
}
@@ -960,7 +988,6 @@ static void ipgre_tunnel_setup(struct net_device *dev)
static void __gre_tunnel_init(struct net_device *dev)
{
struct ip_tunnel *tunnel;
- __be16 flags;
tunnel = netdev_priv(dev);
tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
@@ -972,14 +999,13 @@ static void __gre_tunnel_init(struct net_device *dev)
dev->features |= GRE_FEATURES | NETIF_F_LLTX;
dev->hw_features |= GRE_FEATURES;
- flags = tunnel->parms.o_flags;
-
/* TCP offload with GRE SEQ is not supported, nor can we support 2
* levels of outer headers requiring an update.
*/
- if (flags & TUNNEL_SEQ)
+ if (test_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.o_flags))
return;
- if (flags & TUNNEL_CSUM && tunnel->encap.type != TUNNEL_ENCAP_NONE)
+ if (test_bit(IP_TUNNEL_CSUM_BIT, tunnel->parms.o_flags) &&
+ tunnel->encap.type != TUNNEL_ENCAP_NONE)
return;
dev->features |= NETIF_F_GSO_SOFTWARE;
@@ -1136,7 +1162,7 @@ static int erspan_validate(struct nlattr *tb[], struct nlattr *data[],
static int ipgre_netlink_parms(struct net_device *dev,
struct nlattr *data[],
struct nlattr *tb[],
- struct ip_tunnel_parm *parms,
+ struct ip_tunnel_parm_kern *parms,
__u32 *fwmark)
{
struct ip_tunnel *t = netdev_priv(dev);
@@ -1152,10 +1178,12 @@ static int ipgre_netlink_parms(struct net_device *dev,
parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
if (data[IFLA_GRE_IFLAGS])
- parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
+ gre_flags_to_tnl_flags(parms->i_flags,
+ nla_get_be16(data[IFLA_GRE_IFLAGS]));
if (data[IFLA_GRE_OFLAGS])
- parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
+ gre_flags_to_tnl_flags(parms->o_flags,
+ nla_get_be16(data[IFLA_GRE_OFLAGS]));
if (data[IFLA_GRE_IKEY])
parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
@@ -1203,7 +1231,7 @@ static int ipgre_netlink_parms(struct net_device *dev,
static int erspan_netlink_parms(struct net_device *dev,
struct nlattr *data[],
struct nlattr *tb[],
- struct ip_tunnel_parm *parms,
+ struct ip_tunnel_parm_kern *parms,
__u32 *fwmark)
{
struct ip_tunnel *t = netdev_priv(dev);
@@ -1362,7 +1390,7 @@ static int ipgre_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack)
{
- struct ip_tunnel_parm p;
+ struct ip_tunnel_parm_kern p;
__u32 fwmark = 0;
int err;
@@ -1380,7 +1408,7 @@ static int erspan_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack)
{
- struct ip_tunnel_parm p;
+ struct ip_tunnel_parm_kern p;
__u32 fwmark = 0;
int err;
@@ -1399,8 +1427,8 @@ static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
struct netlink_ext_ack *extack)
{
struct ip_tunnel *t = netdev_priv(dev);
+ struct ip_tunnel_parm_kern p;
__u32 fwmark = t->fwmark;
- struct ip_tunnel_parm p;
int err;
err = ipgre_newlink_encap_setup(dev, data);
@@ -1415,8 +1443,8 @@ static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
if (err < 0)
return err;
- t->parms.i_flags = p.i_flags;
- t->parms.o_flags = p.o_flags;
+ ip_tunnel_flags_copy(t->parms.i_flags, p.i_flags);
+ ip_tunnel_flags_copy(t->parms.o_flags, p.o_flags);
ipgre_link_update(dev, !tb[IFLA_MTU]);
@@ -1428,8 +1456,8 @@ static int erspan_changelink(struct net_device *dev, struct nlattr *tb[],
struct netlink_ext_ack *extack)
{
struct ip_tunnel *t = netdev_priv(dev);
+ struct ip_tunnel_parm_kern p;
__u32 fwmark = t->fwmark;
- struct ip_tunnel_parm p;
int err;
err = ipgre_newlink_encap_setup(dev, data);
@@ -1444,8 +1472,8 @@ static int erspan_changelink(struct net_device *dev, struct nlattr *tb[],
if (err < 0)
return err;
- t->parms.i_flags = p.i_flags;
- t->parms.o_flags = p.o_flags;
+ ip_tunnel_flags_copy(t->parms.i_flags, p.i_flags);
+ ip_tunnel_flags_copy(t->parms.o_flags, p.o_flags);
return 0;
}
@@ -1501,8 +1529,10 @@ static size_t ipgre_get_size(const struct net_device *dev)
static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
{
struct ip_tunnel *t = netdev_priv(dev);
- struct ip_tunnel_parm *p = &t->parms;
- __be16 o_flags = p->o_flags;
+ struct ip_tunnel_parm_kern *p = &t->parms;
+ IP_TUNNEL_DECLARE_FLAGS(o_flags);
+
+ ip_tunnel_flags_copy(o_flags, p->o_flags);
if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
nla_put_be16(skb, IFLA_GRE_IFLAGS,
@@ -1550,7 +1580,7 @@ static int erspan_fill_info(struct sk_buff *skb, const struct net_device *dev)
if (t->erspan_ver <= 2) {
if (t->erspan_ver != 0 && !t->collect_md)
- t->parms.o_flags |= TUNNEL_KEY;
+ __set_bit(IP_TUNNEL_KEY_BIT, t->parms.o_flags);
if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver))
goto nla_put_failure;
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 1b8d8ff9a237..177f40c3a8e8 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -56,17 +56,13 @@ static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
IP_TNL_HASH_BITS);
}
-static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
- __be16 flags, __be32 key)
+static bool ip_tunnel_key_match(const struct ip_tunnel_parm_kern *p,
+ const unsigned long *flags, __be32 key)
{
- if (p->i_flags & TUNNEL_KEY) {
- if (flags & TUNNEL_KEY)
- return key == p->i_key;
- else
- /* key expected, none present */
- return false;
- } else
- return !(flags & TUNNEL_KEY);
+ if (!test_bit(IP_TUNNEL_KEY_BIT, flags))
+ return !test_bit(IP_TUNNEL_KEY_BIT, p->i_flags);
+
+ return test_bit(IP_TUNNEL_KEY_BIT, p->i_flags) && p->i_key == key;
}
/* Fallback tunnel: no source, no destination, no key, no options
@@ -81,7 +77,7 @@ static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
Given src, dst and key, find appropriate for input tunnel.
*/
struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
- int link, __be16 flags,
+ int link, const unsigned long *flags,
__be32 remote, __be32 local,
__be32 key)
{
@@ -143,7 +139,8 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
}
hlist_for_each_entry_rcu(t, head, hash_node) {
- if ((!(flags & TUNNEL_NO_KEY) && t->parms.i_key != key) ||
+ if ((!test_bit(IP_TUNNEL_NO_KEY_BIT, flags) &&
+ t->parms.i_key != key) ||
t->parms.iph.saddr != 0 ||
t->parms.iph.daddr != 0 ||
!(t->dev->flags & IFF_UP))
@@ -171,7 +168,7 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
- struct ip_tunnel_parm *parms)
+ struct ip_tunnel_parm_kern *parms)
{
unsigned int h;
__be32 remote;
@@ -182,7 +179,8 @@ static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
else
remote = 0;
- if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
+ if (!test_bit(IP_TUNNEL_KEY_BIT, parms->i_flags) &&
+ test_bit(IP_TUNNEL_VTI_BIT, parms->i_flags))
i_key = 0;
h = ip_tunnel_hash(i_key, remote);
@@ -206,17 +204,19 @@ static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
}
static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
- struct ip_tunnel_parm *parms,
+ struct ip_tunnel_parm_kern *parms,
int type)
{
__be32 remote = parms->iph.daddr;
__be32 local = parms->iph.saddr;
+ IP_TUNNEL_DECLARE_FLAGS(flags);
__be32 key = parms->i_key;
- __be16 flags = parms->i_flags;
int link = parms->link;
struct ip_tunnel *t = NULL;
struct hlist_head *head = ip_bucket(itn, parms);
+ ip_tunnel_flags_copy(flags, parms->i_flags);
+
hlist_for_each_entry_rcu(t, head, hash_node) {
if (local == t->parms.iph.saddr &&
remote == t->parms.iph.daddr &&
@@ -230,7 +230,7 @@ static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
static struct net_device *__ip_tunnel_create(struct net *net,
const struct rtnl_link_ops *ops,
- struct ip_tunnel_parm *parms)
+ struct ip_tunnel_parm_kern *parms)
{
int err;
struct ip_tunnel *tunnel;
@@ -326,7 +326,7 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
static struct ip_tunnel *ip_tunnel_create(struct net *net,
struct ip_tunnel_net *itn,
- struct ip_tunnel_parm *parms)
+ struct ip_tunnel_parm_kern *parms)
{
struct ip_tunnel *nt;
struct net_device *dev;
@@ -386,15 +386,15 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
}
#endif
- if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
- ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
+ if (test_bit(IP_TUNNEL_CSUM_BIT, tunnel->parms.i_flags) !=
+ test_bit(IP_TUNNEL_CSUM_BIT, tpi->flags)) {
DEV_STATS_INC(tunnel->dev, rx_crc_errors);
DEV_STATS_INC(tunnel->dev, rx_errors);
goto drop;
}
- if (tunnel->parms.i_flags&TUNNEL_SEQ) {
- if (!(tpi->flags&TUNNEL_SEQ) ||
+ if (test_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.i_flags)) {
+ if (!test_bit(IP_TUNNEL_SEQ_BIT, tpi->flags) ||
(tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
DEV_STATS_INC(tunnel->dev, rx_fifo_errors);
DEV_STATS_INC(tunnel->dev, rx_errors);
@@ -638,7 +638,7 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
goto tx_error;
}
- if (key->tun_flags & TUNNEL_DONT_FRAGMENT)
+ if (test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, key->tun_flags))
df = htons(IP_DF);
if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, tunnel_hlen,
key->u.ipv4.dst, true)) {
@@ -871,7 +871,7 @@ EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
static void ip_tunnel_update(struct ip_tunnel_net *itn,
struct ip_tunnel *t,
struct net_device *dev,
- struct ip_tunnel_parm *p,
+ struct ip_tunnel_parm_kern *p,
bool set_mtu,
__u32 fwmark)
{
@@ -903,7 +903,8 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn,
netdev_state_change(dev);
}
-int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
+int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm_kern *p,
+ int cmd)
{
int err = 0;
struct ip_tunnel *t = netdev_priv(dev);
@@ -927,10 +928,10 @@ int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
goto done;
if (p->iph.ttl)
p->iph.frag_off |= htons(IP_DF);
- if (!(p->i_flags & VTI_ISVTI)) {
- if (!(p->i_flags & TUNNEL_KEY))
+ if (!test_bit(IP_TUNNEL_VTI_BIT, p->i_flags)) {
+ if (!test_bit(IP_TUNNEL_KEY_BIT, p->i_flags))
p->i_key = 0;
- if (!(p->o_flags & TUNNEL_KEY))
+ if (!test_bit(IP_TUNNEL_KEY_BIT, p->o_flags))
p->o_key = 0;
}
@@ -1005,16 +1006,58 @@ done:
}
EXPORT_SYMBOL_GPL(ip_tunnel_ctl);
+bool ip_tunnel_parm_from_user(struct ip_tunnel_parm_kern *kp,
+ const void __user *data)
+{
+ struct ip_tunnel_parm p;
+
+ if (copy_from_user(&p, data, sizeof(p)))
+ return false;
+
+ strscpy(kp->name, p.name);
+ kp->link = p.link;
+ ip_tunnel_flags_from_be16(kp->i_flags, p.i_flags);
+ ip_tunnel_flags_from_be16(kp->o_flags, p.o_flags);
+ kp->i_key = p.i_key;
+ kp->o_key = p.o_key;
+ memcpy(&kp->iph, &p.iph, min(sizeof(kp->iph), sizeof(p.iph)));
+
+ return true;
+}
+EXPORT_SYMBOL_GPL(ip_tunnel_parm_from_user);
+
+bool ip_tunnel_parm_to_user(void __user *data, struct ip_tunnel_parm_kern *kp)
+{
+ struct ip_tunnel_parm p;
+
+ if (!ip_tunnel_flags_is_be16_compat(kp->i_flags) ||
+ !ip_tunnel_flags_is_be16_compat(kp->o_flags))
+ return false;
+
+ memset(&p, 0, sizeof(p));
+
+ strscpy(p.name, kp->name);
+ p.link = kp->link;
+ p.i_flags = ip_tunnel_flags_to_be16(kp->i_flags);
+ p.o_flags = ip_tunnel_flags_to_be16(kp->o_flags);
+ p.i_key = kp->i_key;
+ p.o_key = kp->o_key;
+ memcpy(&p.iph, &kp->iph, min(sizeof(p.iph), sizeof(kp->iph)));
+
+ return !copy_to_user(data, &p, sizeof(p));
+}
+EXPORT_SYMBOL_GPL(ip_tunnel_parm_to_user);
+
int ip_tunnel_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
void __user *data, int cmd)
{
- struct ip_tunnel_parm p;
+ struct ip_tunnel_parm_kern p;
int err;
- if (copy_from_user(&p, data, sizeof(p)))
+ if (!ip_tunnel_parm_from_user(&p, data))
return -EFAULT;
err = dev->netdev_ops->ndo_tunnel_ctl(dev, &p, cmd);
- if (!err && copy_to_user(data, &p, sizeof(p)))
+ if (!err && !ip_tunnel_parm_to_user(data, &p))
return -EFAULT;
return err;
}
@@ -1093,7 +1136,7 @@ int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
struct rtnl_link_ops *ops, char *devname)
{
struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
- struct ip_tunnel_parm parms;
+ struct ip_tunnel_parm_kern parms;
unsigned int i;
itn->rtnl_link_ops = ops;
@@ -1171,7 +1214,7 @@ void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id,
EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets);
int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
- struct ip_tunnel_parm *p, __u32 fwmark)
+ struct ip_tunnel_parm_kern *p, __u32 fwmark)
{
struct ip_tunnel *nt;
struct net *net = dev_net(dev);
@@ -1225,7 +1268,7 @@ err_register_netdevice:
EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
- struct ip_tunnel_parm *p, __u32 fwmark)
+ struct ip_tunnel_parm_kern *p, __u32 fwmark)
{
struct ip_tunnel *t;
struct ip_tunnel *tunnel = netdev_priv(dev);
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 80ccd6661aa3..a3676155be78 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -125,6 +125,7 @@ EXPORT_SYMBOL_GPL(__iptunnel_pull_header);
struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md,
gfp_t flags)
{
+ IP_TUNNEL_DECLARE_FLAGS(tun_flags) = { };
struct metadata_dst *res;
struct ip_tunnel_info *dst, *src;
@@ -144,10 +145,10 @@ struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md,
sizeof(struct in6_addr));
else
dst->key.u.ipv4.dst = src->key.u.ipv4.src;
- dst->key.tun_flags = src->key.tun_flags;
+ ip_tunnel_flags_copy(dst->key.tun_flags, src->key.tun_flags);
dst->mode = src->mode | IP_TUNNEL_INFO_TX;
ip_tunnel_info_opts_set(dst, ip_tunnel_info_opts(src),
- src->options_len, 0);
+ src->options_len, tun_flags);
return res;
}
@@ -497,7 +498,7 @@ static int ip_tun_parse_opts_geneve(struct nlattr *attr,
opt->opt_class = nla_get_be16(attr);
attr = tb[LWTUNNEL_IP_OPT_GENEVE_TYPE];
opt->type = nla_get_u8(attr);
- info->key.tun_flags |= TUNNEL_GENEVE_OPT;
+ __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, info->key.tun_flags);
}
return sizeof(struct geneve_opt) + data_len;
@@ -525,7 +526,7 @@ static int ip_tun_parse_opts_vxlan(struct nlattr *attr,
attr = tb[LWTUNNEL_IP_OPT_VXLAN_GBP];
md->gbp = nla_get_u32(attr);
md->gbp &= VXLAN_GBP_MASK;
- info->key.tun_flags |= TUNNEL_VXLAN_OPT;
+ __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, info->key.tun_flags);
}
return sizeof(struct vxlan_metadata);
@@ -574,7 +575,7 @@ static int ip_tun_parse_opts_erspan(struct nlattr *attr,
set_hwid(&md->u.md2, nla_get_u8(attr));
}
- info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
+ __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, info->key.tun_flags);
}
return sizeof(struct erspan_metadata);
@@ -585,7 +586,7 @@ static int ip_tun_parse_opts(struct nlattr *attr, struct ip_tunnel_info *info,
{
int err, rem, opt_len, opts_len = 0;
struct nlattr *nla;
- __be16 type = 0;
+ u32 type = 0;
if (!attr)
return 0;
@@ -598,7 +599,7 @@ static int ip_tun_parse_opts(struct nlattr *attr, struct ip_tunnel_info *info,
nla_for_each_attr(nla, nla_data(attr), nla_len(attr), rem) {
switch (nla_type(nla)) {
case LWTUNNEL_IP_OPTS_GENEVE:
- if (type && type != TUNNEL_GENEVE_OPT)
+ if (type && type != IP_TUNNEL_GENEVE_OPT_BIT)
return -EINVAL;
opt_len = ip_tun_parse_opts_geneve(nla, info, opts_len,
extack);
@@ -607,7 +608,7 @@ static int ip_tun_parse_opts(struct nlattr *attr, struct ip_tunnel_info *info,
opts_len += opt_len;
if (opts_len > IP_TUNNEL_OPTS_MAX)
return -EINVAL;
- type = TUNNEL_GENEVE_OPT;
+ type = IP_TUNNEL_GENEVE_OPT_BIT;
break;
case LWTUNNEL_IP_OPTS_VXLAN:
if (type)
@@ -617,7 +618,7 @@ static int ip_tun_parse_opts(struct nlattr *attr, struct ip_tunnel_info *info,
if (opt_len < 0)
return opt_len;
opts_len += opt_len;
- type = TUNNEL_VXLAN_OPT;
+ type = IP_TUNNEL_VXLAN_OPT_BIT;
break;
case LWTUNNEL_IP_OPTS_ERSPAN:
if (type)
@@ -627,7 +628,7 @@ static int ip_tun_parse_opts(struct nlattr *attr, struct ip_tunnel_info *info,
if (opt_len < 0)
return opt_len;
opts_len += opt_len;
- type = TUNNEL_ERSPAN_OPT;
+ type = IP_TUNNEL_ERSPAN_OPT_BIT;
break;
default:
return -EINVAL;
@@ -705,10 +706,16 @@ static int ip_tun_build_state(struct net *net, struct nlattr *attr,
if (tb[LWTUNNEL_IP_TOS])
tun_info->key.tos = nla_get_u8(tb[LWTUNNEL_IP_TOS]);
- if (tb[LWTUNNEL_IP_FLAGS])
- tun_info->key.tun_flags |=
- (nla_get_be16(tb[LWTUNNEL_IP_FLAGS]) &
- ~TUNNEL_OPTIONS_PRESENT);
+ if (tb[LWTUNNEL_IP_FLAGS]) {
+ IP_TUNNEL_DECLARE_FLAGS(flags);
+
+ ip_tunnel_flags_from_be16(flags,
+ nla_get_be16(tb[LWTUNNEL_IP_FLAGS]));
+ ip_tunnel_clear_options_present(flags);
+
+ ip_tunnel_flags_or(tun_info->key.tun_flags,
+ tun_info->key.tun_flags, flags);
+ }
tun_info->mode = IP_TUNNEL_INFO_TX;
tun_info->options_len = opt_len;
@@ -812,18 +819,18 @@ static int ip_tun_fill_encap_opts(struct sk_buff *skb, int type,
struct nlattr *nest;
int err = 0;
- if (!(tun_info->key.tun_flags & TUNNEL_OPTIONS_PRESENT))
+ if (!ip_tunnel_is_options_present(tun_info->key.tun_flags))
return 0;
nest = nla_nest_start_noflag(skb, type);
if (!nest)
return -ENOMEM;
- if (tun_info->key.tun_flags & TUNNEL_GENEVE_OPT)
+ if (test_bit(IP_TUNNEL_GENEVE_OPT_BIT, tun_info->key.tun_flags))
err = ip_tun_fill_encap_opts_geneve(skb, tun_info);
- else if (tun_info->key.tun_flags & TUNNEL_VXLAN_OPT)
+ else if (test_bit(IP_TUNNEL_VXLAN_OPT_BIT, tun_info->key.tun_flags))
err = ip_tun_fill_encap_opts_vxlan(skb, tun_info);
- else if (tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT)
+ else if (test_bit(IP_TUNNEL_ERSPAN_OPT_BIT, tun_info->key.tun_flags))
err = ip_tun_fill_encap_opts_erspan(skb, tun_info);
if (err) {
@@ -846,7 +853,8 @@ static int ip_tun_fill_encap_info(struct sk_buff *skb,
nla_put_in_addr(skb, LWTUNNEL_IP_SRC, tun_info->key.u.ipv4.src) ||
nla_put_u8(skb, LWTUNNEL_IP_TOS, tun_info->key.tos) ||
nla_put_u8(skb, LWTUNNEL_IP_TTL, tun_info->key.ttl) ||
- nla_put_be16(skb, LWTUNNEL_IP_FLAGS, tun_info->key.tun_flags) ||
+ nla_put_be16(skb, LWTUNNEL_IP_FLAGS,
+ ip_tunnel_flags_to_be16(tun_info->key.tun_flags)) ||
ip_tun_fill_encap_opts(skb, LWTUNNEL_IP_OPTS, tun_info))
return -ENOMEM;
@@ -857,11 +865,11 @@ static int ip_tun_opts_nlsize(struct ip_tunnel_info *info)
{
int opt_len;
- if (!(info->key.tun_flags & TUNNEL_OPTIONS_PRESENT))
+ if (!ip_tunnel_is_options_present(info->key.tun_flags))
return 0;
opt_len = nla_total_size(0); /* LWTUNNEL_IP_OPTS */
- if (info->key.tun_flags & TUNNEL_GENEVE_OPT) {
+ if (test_bit(IP_TUNNEL_GENEVE_OPT_BIT, info->key.tun_flags)) {
struct geneve_opt *opt;
int offset = 0;
@@ -874,10 +882,10 @@ static int ip_tun_opts_nlsize(struct ip_tunnel_info *info)
/* OPT_GENEVE_DATA */
offset += sizeof(*opt) + opt->length * 4;
}
- } else if (info->key.tun_flags & TUNNEL_VXLAN_OPT) {
+ } else if (test_bit(IP_TUNNEL_VXLAN_OPT_BIT, info->key.tun_flags)) {
opt_len += nla_total_size(0) /* LWTUNNEL_IP_OPTS_VXLAN */
+ nla_total_size(4); /* OPT_VXLAN_GBP */
- } else if (info->key.tun_flags & TUNNEL_ERSPAN_OPT) {
+ } else if (test_bit(IP_TUNNEL_ERSPAN_OPT_BIT, info->key.tun_flags)) {
struct erspan_metadata *md = ip_tunnel_info_opts(info);
opt_len += nla_total_size(0) /* LWTUNNEL_IP_OPTS_ERSPAN */
@@ -984,10 +992,17 @@ static int ip6_tun_build_state(struct net *net, struct nlattr *attr,
if (tb[LWTUNNEL_IP6_TC])
tun_info->key.tos = nla_get_u8(tb[LWTUNNEL_IP6_TC]);
- if (tb[LWTUNNEL_IP6_FLAGS])
- tun_info->key.tun_flags |=
- (nla_get_be16(tb[LWTUNNEL_IP6_FLAGS]) &
- ~TUNNEL_OPTIONS_PRESENT);
+ if (tb[LWTUNNEL_IP6_FLAGS]) {
+ IP_TUNNEL_DECLARE_FLAGS(flags);
+ __be16 data;
+
+ data = nla_get_be16(tb[LWTUNNEL_IP6_FLAGS]);
+ ip_tunnel_flags_from_be16(flags, data);
+ ip_tunnel_clear_options_present(flags);
+
+ ip_tunnel_flags_or(tun_info->key.tun_flags,
+ tun_info->key.tun_flags, flags);
+ }
tun_info->mode = IP_TUNNEL_INFO_TX | IP_TUNNEL_INFO_IPV6;
tun_info->options_len = opt_len;
@@ -1008,7 +1023,8 @@ static int ip6_tun_fill_encap_info(struct sk_buff *skb,
nla_put_in6_addr(skb, LWTUNNEL_IP6_SRC, &tun_info->key.u.ipv6.src) ||
nla_put_u8(skb, LWTUNNEL_IP6_TC, tun_info->key.tos) ||
nla_put_u8(skb, LWTUNNEL_IP6_HOPLIMIT, tun_info->key.ttl) ||
- nla_put_be16(skb, LWTUNNEL_IP6_FLAGS, tun_info->key.tun_flags) ||
+ nla_put_be16(skb, LWTUNNEL_IP6_FLAGS,
+ ip_tunnel_flags_to_be16(tun_info->key.tun_flags)) ||
ip_tun_fill_encap_opts(skb, LWTUNNEL_IP6_OPTS, tun_info))
return -ENOMEM;
@@ -1116,7 +1132,7 @@ bool ip_tunnel_netlink_encap_parms(struct nlattr *data[],
EXPORT_SYMBOL_GPL(ip_tunnel_netlink_encap_parms);
void ip_tunnel_netlink_parms(struct nlattr *data[],
- struct ip_tunnel_parm *parms)
+ struct ip_tunnel_parm_kern *parms)
{
if (data[IFLA_IPTUN_LINK])
parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]);
@@ -1139,8 +1155,12 @@ void ip_tunnel_netlink_parms(struct nlattr *data[],
if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC]))
parms->iph.frag_off = htons(IP_DF);
- if (data[IFLA_IPTUN_FLAGS])
- parms->i_flags = nla_get_be16(data[IFLA_IPTUN_FLAGS]);
+ if (data[IFLA_IPTUN_FLAGS]) {
+ __be16 flags;
+
+ flags = nla_get_be16(data[IFLA_IPTUN_FLAGS]);
+ ip_tunnel_flags_from_be16(parms->i_flags, flags);
+ }
if (data[IFLA_IPTUN_PROTO])
parms->iph.protocol = nla_get_u8(data[IFLA_IPTUN_PROTO]);
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index ee587adb169f..14536da9f5dc 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -51,8 +51,11 @@ static int vti_input(struct sk_buff *skb, int nexthdr, __be32 spi,
const struct iphdr *iph = ip_hdr(skb);
struct net *net = dev_net(skb->dev);
struct ip_tunnel_net *itn = net_generic(net, vti_net_id);
+ IP_TUNNEL_DECLARE_FLAGS(flags) = { };
- tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
+ __set_bit(IP_TUNNEL_NO_KEY_BIT, flags);
+
+ tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, flags,
iph->saddr, iph->daddr, 0);
if (tunnel) {
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
@@ -167,7 +170,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
struct flowi *fl)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
- struct ip_tunnel_parm *parms = &tunnel->parms;
+ struct ip_tunnel_parm_kern *parms = &tunnel->parms;
struct dst_entry *dst = skb_dst(skb);
struct net_device *tdev; /* Device to other host */
int pkt_len = skb->len;
@@ -322,8 +325,11 @@ static int vti4_err(struct sk_buff *skb, u32 info)
const struct iphdr *iph = (const struct iphdr *)skb->data;
int protocol = iph->protocol;
struct ip_tunnel_net *itn = net_generic(net, vti_net_id);
+ IP_TUNNEL_DECLARE_FLAGS(flags) = { };
+
+ __set_bit(IP_TUNNEL_NO_KEY_BIT, flags);
- tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
+ tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, flags,
iph->daddr, iph->saddr, 0);
if (!tunnel)
return -1;
@@ -373,8 +379,9 @@ static int vti4_err(struct sk_buff *skb, u32 info)
}
static int
-vti_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
+vti_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm_kern *p, int cmd)
{
+ IP_TUNNEL_DECLARE_FLAGS(flags) = { };
int err = 0;
if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
@@ -383,20 +390,26 @@ vti_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
return -EINVAL;
}
- if (!(p->i_flags & GRE_KEY))
+ if (!ip_tunnel_flags_is_be16_compat(p->i_flags) ||
+ !ip_tunnel_flags_is_be16_compat(p->o_flags))
+ return -EOVERFLOW;
+
+ if (!(ip_tunnel_flags_to_be16(p->i_flags) & GRE_KEY))
p->i_key = 0;
- if (!(p->o_flags & GRE_KEY))
+ if (!(ip_tunnel_flags_to_be16(p->o_flags) & GRE_KEY))
p->o_key = 0;
- p->i_flags = VTI_ISVTI;
+ __set_bit(IP_TUNNEL_VTI_BIT, flags);
+ ip_tunnel_flags_copy(p->i_flags, flags);
err = ip_tunnel_ctl(dev, p, cmd);
if (err)
return err;
if (cmd != SIOCDELTUNNEL) {
- p->i_flags |= GRE_KEY;
- p->o_flags |= GRE_KEY;
+ ip_tunnel_flags_from_be16(flags, GRE_KEY);
+ ip_tunnel_flags_or(p->i_flags, p->i_flags, flags);
+ ip_tunnel_flags_or(p->o_flags, p->o_flags, flags);
}
return 0;
}
@@ -531,7 +544,7 @@ static int vti_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
}
static void vti_netlink_parms(struct nlattr *data[],
- struct ip_tunnel_parm *parms,
+ struct ip_tunnel_parm_kern *parms,
__u32 *fwmark)
{
memset(parms, 0, sizeof(*parms));
@@ -541,7 +554,7 @@ static void vti_netlink_parms(struct nlattr *data[],
if (!data)
return;
- parms->i_flags = VTI_ISVTI;
+ __set_bit(IP_TUNNEL_VTI_BIT, parms->i_flags);
if (data[IFLA_VTI_LINK])
parms->link = nla_get_u32(data[IFLA_VTI_LINK]);
@@ -566,7 +579,7 @@ static int vti_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack)
{
- struct ip_tunnel_parm parms;
+ struct ip_tunnel_parm_kern parms;
__u32 fwmark = 0;
vti_netlink_parms(data, &parms, &fwmark);
@@ -578,8 +591,8 @@ static int vti_changelink(struct net_device *dev, struct nlattr *tb[],
struct netlink_ext_ack *extack)
{
struct ip_tunnel *t = netdev_priv(dev);
+ struct ip_tunnel_parm_kern p;
__u32 fwmark = t->fwmark;
- struct ip_tunnel_parm p;
vti_netlink_parms(data, &p, &fwmark);
return ip_tunnel_changelink(dev, tb, &p, fwmark);
@@ -606,7 +619,7 @@ static size_t vti_get_size(const struct net_device *dev)
static int vti_fill_info(struct sk_buff *skb, const struct net_device *dev)
{
struct ip_tunnel *t = netdev_priv(dev);
- struct ip_tunnel_parm *p = &t->parms;
+ struct ip_tunnel_parm_kern *p = &t->parms;
if (nla_put_u32(skb, IFLA_VTI_LINK, p->link) ||
nla_put_be32(skb, IFLA_VTI_IKEY, p->i_key) ||
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index f2696eaadbe6..923a2ef68c2f 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -130,13 +130,16 @@ static int ipip_err(struct sk_buff *skb, u32 info)
struct net *net = dev_net(skb->dev);
struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
const struct iphdr *iph = (const struct iphdr *)skb->data;
+ IP_TUNNEL_DECLARE_FLAGS(flags) = { };
const int type = icmp_hdr(skb)->type;
const int code = icmp_hdr(skb)->code;
struct ip_tunnel *t;
int err = 0;
- t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
- iph->daddr, iph->saddr, 0);
+ __set_bit(IP_TUNNEL_NO_KEY_BIT, flags);
+
+ t = ip_tunnel_lookup(itn, skb->dev->ifindex, flags, iph->daddr,
+ iph->saddr, 0);
if (!t) {
err = -ENOENT;
goto out;
@@ -213,13 +216,16 @@ static int ipip_tunnel_rcv(struct sk_buff *skb, u8 ipproto)
{
struct net *net = dev_net(skb->dev);
struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
+ IP_TUNNEL_DECLARE_FLAGS(flags) = { };
struct metadata_dst *tun_dst = NULL;
struct ip_tunnel *tunnel;
const struct iphdr *iph;
+ __set_bit(IP_TUNNEL_NO_KEY_BIT, flags);
+
iph = ip_hdr(skb);
- tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
- iph->saddr, iph->daddr, 0);
+ tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, flags, iph->saddr,
+ iph->daddr, 0);
if (tunnel) {
const struct tnl_ptk_info *tpi;
@@ -238,7 +244,9 @@ static int ipip_tunnel_rcv(struct sk_buff *skb, u8 ipproto)
if (iptunnel_pull_header(skb, 0, tpi->proto, false))
goto drop;
if (tunnel->collect_md) {
- tun_dst = ip_tun_rx_dst(skb, 0, 0, 0);
+ ip_tunnel_flags_zero(flags);
+
+ tun_dst = ip_tun_rx_dst(skb, flags, 0, 0);
if (!tun_dst)
return 0;
ip_tunnel_md_udp_encap(skb, &tun_dst->u.tun_info);
@@ -330,7 +338,7 @@ static bool ipip_tunnel_ioctl_verify_protocol(u8 ipproto)
}
static int
-ipip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
+ipip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm_kern *p, int cmd)
{
if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
if (p->iph.version != 4 ||
@@ -340,7 +348,8 @@ ipip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
}
p->i_key = p->o_key = 0;
- p->i_flags = p->o_flags = 0;
+ ip_tunnel_flags_zero(p->i_flags);
+ ip_tunnel_flags_zero(p->o_flags);
return ip_tunnel_ctl(dev, p, cmd);
}
@@ -405,8 +414,8 @@ static int ipip_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
}
static void ipip_netlink_parms(struct nlattr *data[],
- struct ip_tunnel_parm *parms, bool *collect_md,
- __u32 *fwmark)
+ struct ip_tunnel_parm_kern *parms,
+ bool *collect_md, __u32 *fwmark)
{
memset(parms, 0, sizeof(*parms));
@@ -432,8 +441,8 @@ static int ipip_newlink(struct net *src_net, struct net_device *dev,
struct netlink_ext_ack *extack)
{
struct ip_tunnel *t = netdev_priv(dev);
- struct ip_tunnel_parm p;
struct ip_tunnel_encap ipencap;
+ struct ip_tunnel_parm_kern p;
__u32 fwmark = 0;
if (ip_tunnel_netlink_encap_parms(data, &ipencap)) {
@@ -452,8 +461,8 @@ static int ipip_changelink(struct net_device *dev, struct nlattr *tb[],
struct netlink_ext_ack *extack)
{
struct ip_tunnel *t = netdev_priv(dev);
- struct ip_tunnel_parm p;
struct ip_tunnel_encap ipencap;
+ struct ip_tunnel_parm_kern p;
bool collect_md;
__u32 fwmark = t->fwmark;
@@ -510,7 +519,7 @@ static size_t ipip_get_size(const struct net_device *dev)
static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
- struct ip_tunnel_parm *parm = &tunnel->parms;
+ struct ip_tunnel_parm_kern *parm = &tunnel->parms;
if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
nla_put_in_addr(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) ||
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index fd5c01c8489f..6c750bd13dd8 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -441,7 +441,7 @@ static bool ipmr_init_vif_indev(const struct net_device *dev)
static struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
{
struct net_device *tunnel_dev, *new_dev;
- struct ip_tunnel_parm p = { };
+ struct ip_tunnel_parm_kern p = { };
int err;
tunnel_dev = __dev_get_by_name(net, "tunl0");
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 500f665f98cb..b61d36810fe3 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -462,7 +462,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
}
/* Try to redo what tcp_v4_send_synack did. */
- req->rsk_window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW);
+ req->rsk_window_clamp = READ_ONCE(tp->window_clamp) ? :
+ dst_metric(&rt->dst, RTAX_WINDOW);
/* limit the window selection if the user enforce a smaller rx buffer */
full_space = tcp_full_space(sk);
if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index e767721b3a58..e1bf468e0d22 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -290,6 +290,9 @@ enum {
DEFINE_PER_CPU(unsigned int, tcp_orphan_count);
EXPORT_PER_CPU_SYMBOL_GPL(tcp_orphan_count);
+DEFINE_PER_CPU(u32, tcp_tw_isn);
+EXPORT_PER_CPU_SYMBOL_GPL(tcp_tw_isn);
+
long sysctl_tcp_mem[3] __read_mostly;
EXPORT_SYMBOL(sysctl_tcp_mem);
@@ -1721,7 +1724,7 @@ int tcp_set_rcvlowat(struct sock *sk, int val)
space = tcp_space_from_win(sk, val);
if (space > sk->sk_rcvbuf) {
WRITE_ONCE(sk->sk_rcvbuf, space);
- tcp_sk(sk)->window_clamp = val;
+ WRITE_ONCE(tcp_sk(sk)->window_clamp, val);
}
return 0;
}
@@ -3379,7 +3382,7 @@ int tcp_set_window_clamp(struct sock *sk, int val)
if (!val) {
if (sk->sk_state != TCP_CLOSE)
return -EINVAL;
- tp->window_clamp = 0;
+ WRITE_ONCE(tp->window_clamp, 0);
} else {
u32 new_rcv_ssthresh, old_window_clamp = tp->window_clamp;
u32 new_window_clamp = val < SOCK_MIN_RCVBUF / 2 ?
@@ -3388,7 +3391,7 @@ int tcp_set_window_clamp(struct sock *sk, int val)
if (new_window_clamp == old_window_clamp)
return 0;
- tp->window_clamp = new_window_clamp;
+ WRITE_ONCE(tp->window_clamp, new_window_clamp);
if (new_window_clamp < old_window_clamp) {
/* need to apply the reserved mem provisioning only
* when shrinking the window clamp
@@ -4057,7 +4060,7 @@ int do_tcp_getsockopt(struct sock *sk, int level,
TCP_RTO_MAX / HZ);
break;
case TCP_WINDOW_CLAMP:
- val = tp->window_clamp;
+ val = READ_ONCE(tp->window_clamp);
break;
case TCP_INFO: {
struct tcp_info info;
@@ -4648,16 +4651,16 @@ static void __init tcp_struct_check(void)
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, lsndtime);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, mdev_us);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, tcp_wstamp_ns);
- CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, tcp_clock_cache);
- CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, tcp_mstamp);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, rtt_seq);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, tsorted_sent_queue);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, highest_sack);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, ecn_flags);
- CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_tx, 105);
+ CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_tx, 89);
/* TXRX read-write hotpath cache lines */
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, pred_flags);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, tcp_clock_cache);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, tcp_mstamp);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rcv_nxt);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, snd_nxt);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, snd_una);
@@ -4670,7 +4673,11 @@ static void __init tcp_struct_check(void)
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, app_limited);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rcv_wnd);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rx_opt);
- CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_txrx, 76);
+
+ /* 32bit arches with 8byte alignment on u64 fields might need padding
+ * before tcp_clock_cache.
+ */
+ CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_txrx, 92 + 4);
/* RX read-write hotpath cache lines */
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, bytes_received);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 5d874817a78d..5a45a0923a1f 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -563,19 +563,20 @@ static void tcp_init_buffer_space(struct sock *sk)
maxwin = tcp_full_space(sk);
if (tp->window_clamp >= maxwin) {
- tp->window_clamp = maxwin;
+ WRITE_ONCE(tp->window_clamp, maxwin);
if (tcp_app_win && maxwin > 4 * tp->advmss)
- tp->window_clamp = max(maxwin -
- (maxwin >> tcp_app_win),
- 4 * tp->advmss);
+ WRITE_ONCE(tp->window_clamp,
+ max(maxwin - (maxwin >> tcp_app_win),
+ 4 * tp->advmss));
}
/* Force reservation of one segment. */
if (tcp_app_win &&
tp->window_clamp > 2 * tp->advmss &&
tp->window_clamp + tp->advmss > maxwin)
- tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss);
+ WRITE_ONCE(tp->window_clamp,
+ max(2 * tp->advmss, maxwin - tp->advmss));
tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp);
tp->snd_cwnd_stamp = tcp_jiffies32;
@@ -773,7 +774,8 @@ void tcp_rcv_space_adjust(struct sock *sk)
WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
/* Make the window clamp follow along. */
- tp->window_clamp = tcp_win_from_space(sk, rcvbuf);
+ WRITE_ONCE(tp->window_clamp,
+ tcp_win_from_space(sk, rcvbuf));
}
}
tp->rcvq_space.space = copied;
@@ -4803,10 +4805,8 @@ static bool tcp_try_coalesce(struct sock *sk,
if (!mptcp_skb_can_collapse(to, from))
return false;
-#ifdef CONFIG_TLS_DEVICE
- if (from->decrypted != to->decrypted)
+ if (skb_cmp_decrypted(from, to))
return false;
-#endif
if (!skb_try_coalesce(to, from, fragstolen, &delta))
return false;
@@ -5375,9 +5375,7 @@ restart:
break;
memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
-#ifdef CONFIG_TLS_DEVICE
- nskb->decrypted = skb->decrypted;
-#endif
+ skb_copy_decrypted(nskb, skb);
TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
if (list)
__skb_queue_before(list, skb, nskb);
@@ -5407,10 +5405,8 @@ restart:
!mptcp_skb_can_collapse(nskb, skb) ||
(TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
goto end;
-#ifdef CONFIG_TLS_DEVICE
- if (skb->decrypted != nskb->decrypted)
+ if (skb_cmp_decrypted(skb, nskb))
goto end;
-#endif
}
}
}
@@ -6426,7 +6422,8 @@ consume:
if (!tp->rx_opt.wscale_ok) {
tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0;
- tp->window_clamp = min(tp->window_clamp, 65535U);
+ WRITE_ONCE(tp->window_clamp,
+ min(tp->window_clamp, 65535U));
}
if (tp->rx_opt.saw_tstamp) {
@@ -6999,7 +6996,7 @@ EXPORT_SYMBOL(inet_reqsk_alloc);
/*
* Return true if a syncookie should be sent
*/
-static bool tcp_syn_flood_action(const struct sock *sk, const char *proto)
+static bool tcp_syn_flood_action(struct sock *sk, const char *proto)
{
struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
const char *msg = "Dropping request";
@@ -7100,7 +7097,6 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
struct sock *sk, struct sk_buff *skb)
{
struct tcp_fastopen_cookie foc = { .len = -1 };
- __u32 isn = TCP_SKB_CB(skb)->tcp_tw_isn;
struct tcp_options_received tmp_opt;
struct tcp_sock *tp = tcp_sk(sk);
struct net *net = sock_net(sk);
@@ -7110,21 +7106,28 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
struct dst_entry *dst;
struct flowi fl;
u8 syncookies;
+ u32 isn;
#ifdef CONFIG_TCP_AO
const struct tcp_ao_hdr *aoh;
#endif
- syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies);
+ isn = __this_cpu_read(tcp_tw_isn);
+ if (isn) {
+ /* TW buckets are converted to open requests without
+ * limitations, they conserve resources and peer is
+ * evidently real one.
+ */
+ __this_cpu_write(tcp_tw_isn, 0);
+ } else {
+ syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies);
- /* TW buckets are converted to open requests without
- * limitations, they conserve resources and peer is
- * evidently real one.
- */
- if ((syncookies == 2 || inet_csk_reqsk_queue_is_full(sk)) && !isn) {
- want_cookie = tcp_syn_flood_action(sk, rsk_ops->slab_name);
- if (!want_cookie)
- goto drop;
+ if (syncookies == 2 || inet_csk_reqsk_queue_is_full(sk)) {
+ want_cookie = tcp_syn_flood_action(sk,
+ rsk_ops->slab_name);
+ if (!want_cookie)
+ goto drop;
+ }
}
if (sk_acceptq_is_full(sk)) {
@@ -7163,7 +7166,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
/* Note: tcp_v6_init_req() might override ir_iif for link locals */
inet_rsk(req)->ir_iif = inet_request_bound_dev_if(sk, skb);
- dst = af_ops->route_req(sk, skb, &fl, req);
+ dst = af_ops->route_req(sk, skb, &fl, req, isn);
if (!dst)
goto drop_and_free;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a22ee5838751..1e650ec71d2f 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -866,11 +866,10 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
* routing might fail in this case. No choice here, if we choose to force
* input interface, we will misroute in case of asymmetric route.
*/
- if (sk) {
+ if (sk)
arg.bound_dev_if = sk->sk_bound_dev_if;
- if (sk_fullsock(sk))
- trace_tcp_send_reset(sk, skb);
- }
+
+ trace_tcp_send_reset(sk, skb);
BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) !=
offsetof(struct inet_timewait_sock, tw_bound_dev_if));
@@ -1667,7 +1666,8 @@ static void tcp_v4_init_req(struct request_sock *req,
static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
struct sk_buff *skb,
struct flowi *fl,
- struct request_sock *req)
+ struct request_sock *req,
+ u32 tw_isn)
{
tcp_v4_init_req(req, sk, skb);
@@ -2045,10 +2045,8 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb,
TCP_SKB_CB(skb)->tcp_flags) & TCPHDR_ACK) ||
((TCP_SKB_CB(tail)->tcp_flags ^
TCP_SKB_CB(skb)->tcp_flags) & (TCPHDR_ECE | TCPHDR_CWR)) ||
-#ifdef CONFIG_TLS_DEVICE
- tail->decrypted != skb->decrypted ||
-#endif
!mptcp_skb_can_collapse(tail, skb) ||
+ skb_cmp_decrypted(tail, skb) ||
thtail->doff != th->doff ||
memcmp(thtail + 1, th + 1, hdrlen - sizeof(*th)))
goto no_coalesce;
@@ -2148,7 +2146,6 @@ static void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph,
skb->len - th->doff * 4);
TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
- TCP_SKB_CB(skb)->tcp_tw_isn = 0;
TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
TCP_SKB_CB(skb)->sacked = 0;
TCP_SKB_CB(skb)->has_rxtstamp =
@@ -2170,6 +2167,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
bool refcounted;
struct sock *sk;
int ret;
+ u32 isn;
drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
if (skb->pkt_type != PACKET_HOST)
@@ -2385,7 +2383,7 @@ do_time_wait:
inet_twsk_put(inet_twsk(sk));
goto csum_error;
}
- switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
+ switch (tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn)) {
case TCP_TW_SYN: {
struct sock *sk2 = inet_lookup_listener(net,
net->ipv4.tcp_death_row.hashinfo,
@@ -2399,6 +2397,7 @@ do_time_wait:
sk = sk2;
tcp_v4_restore_cb(skb);
refcounted = false;
+ __this_cpu_write(tcp_tw_isn, isn);
goto process;
}
}
@@ -3501,7 +3500,7 @@ static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
{
struct net *net;
- tcp_twsk_purge(net_exit_list, AF_INET);
+ tcp_twsk_purge(net_exit_list);
list_for_each_entry(net, net_exit_list, exit_list) {
inet_pernet_hashinfo_free(net->ipv4.tcp_death_row.hashinfo);
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index f0761f060a83..f53c7ada2ace 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -95,7 +95,7 @@ static void twsk_rcv_nxt_update(struct tcp_timewait_sock *tcptw, u32 seq)
*/
enum tcp_tw_status
tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
- const struct tcphdr *th)
+ const struct tcphdr *th, u32 *tw_isn)
{
struct tcp_options_received tmp_opt;
struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
@@ -228,7 +228,7 @@ kill:
u32 isn = tcptw->tw_snd_nxt + 65535 + 2;
if (isn == 0)
isn++;
- TCP_SKB_CB(skb)->tcp_tw_isn = isn;
+ *tw_isn = isn;
return TCP_TW_SYN;
}
@@ -388,7 +388,7 @@ void tcp_twsk_destructor(struct sock *sk)
}
EXPORT_SYMBOL_GPL(tcp_twsk_destructor);
-void tcp_twsk_purge(struct list_head *net_exit_list, int family)
+void tcp_twsk_purge(struct list_head *net_exit_list)
{
bool purged_once = false;
struct net *net;
@@ -396,9 +396,9 @@ void tcp_twsk_purge(struct list_head *net_exit_list, int family)
list_for_each_entry(net, net_exit_list, exit_list) {
if (net->ipv4.tcp_death_row.hashinfo->pernet) {
/* Even if tw_refcount == 1, we must clean up kernel reqsk */
- inet_twsk_purge(net->ipv4.tcp_death_row.hashinfo, family);
+ inet_twsk_purge(net->ipv4.tcp_death_row.hashinfo);
} else if (!purged_once) {
- inet_twsk_purge(&tcp_hashinfo, family);
+ inet_twsk_purge(&tcp_hashinfo);
purged_once = true;
}
}
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index ebe4722bb020..fab0973f995b 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -265,9 +265,7 @@ found:
flush |= (len - 1) >= mss;
flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq);
-#ifdef CONFIG_TLS_DEVICE
- flush |= p->decrypted ^ skb->decrypted;
-#endif
+ flush |= skb_cmp_decrypted(p, skb);
if (flush || skb_gro_receive(p, skb)) {
mss = 1;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index e3167ad96567..9282fafc0e61 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -203,16 +203,17 @@ static inline void tcp_event_ack_sent(struct sock *sk, u32 rcv_nxt)
* This MUST be enforced by all callers.
*/
void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
- __u32 *rcv_wnd, __u32 *window_clamp,
+ __u32 *rcv_wnd, __u32 *__window_clamp,
int wscale_ok, __u8 *rcv_wscale,
__u32 init_rcv_wnd)
{
unsigned int space = (__space < 0 ? 0 : __space);
+ u32 window_clamp = READ_ONCE(*__window_clamp);
/* If no clamp set the clamp to the max possible scaled window */
- if (*window_clamp == 0)
- (*window_clamp) = (U16_MAX << TCP_MAX_WSCALE);
- space = min(*window_clamp, space);
+ if (window_clamp == 0)
+ window_clamp = (U16_MAX << TCP_MAX_WSCALE);
+ space = min(window_clamp, space);
/* Quantize space offering to a multiple of mss if possible. */
if (space > mss)
@@ -239,12 +240,13 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
/* Set window scaling on max possible window */
space = max_t(u32, space, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
space = max_t(u32, space, READ_ONCE(sysctl_rmem_max));
- space = min_t(u32, space, *window_clamp);
+ space = min_t(u32, space, window_clamp);
*rcv_wscale = clamp_t(int, ilog2(space) - 15,
0, TCP_MAX_WSCALE);
}
/* Set the clamp no higher than max representable value */
- (*window_clamp) = min_t(__u32, U16_MAX << (*rcv_wscale), *window_clamp);
+ WRITE_ONCE(*__window_clamp,
+ min_t(__u32, U16_MAX << (*rcv_wscale), window_clamp));
}
EXPORT_SYMBOL(tcp_select_initial_window);
@@ -3855,7 +3857,7 @@ static void tcp_connect_init(struct sock *sk)
tcp_ca_dst_init(sk, dst);
if (!tp->window_clamp)
- tp->window_clamp = dst_metric(dst, RTAX_WINDOW);
+ WRITE_ONCE(tp->window_clamp, dst_metric(dst, RTAX_WINDOW));
tp->advmss = tcp_mss_clamp(tp, dst_metric_advmss(dst));
tcp_initialize_rcv_mss(sk);
@@ -3863,7 +3865,7 @@ static void tcp_connect_init(struct sock *sk)
/* limit the window selection if the user enforce a smaller rx buffer */
if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
(tp->window_clamp > tcp_full_space(sk) || tp->window_clamp == 0))
- tp->window_clamp = tcp_full_space(sk);
+ WRITE_ONCE(tp->window_clamp, tcp_full_space(sk));
rcv_wnd = tcp_rwnd_init_bpf(sk);
if (rcv_wnd == 0)
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index d1ad20ce1c8c..976db57b95d4 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -25,7 +25,7 @@
static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk)
{
- struct inet_connection_sock *icsk = inet_csk(sk);
+ const struct inet_connection_sock *icsk = inet_csk(sk);
const struct tcp_sock *tp = tcp_sk(sk);
u32 elapsed, user_timeout;
s32 remaining;
@@ -47,7 +47,7 @@ static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk)
u32 tcp_clamp_probe0_to_user_timeout(const struct sock *sk, u32 when)
{
- struct inet_connection_sock *icsk = inet_csk(sk);
+ const struct inet_connection_sock *icsk = inet_csk(sk);
u32 remaining, user_timeout;
s32 elapsed;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index c02bf011d4a6..7613daa339b0 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1499,13 +1499,15 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
struct sk_buff_head *list = &sk->sk_receive_queue;
int rmem, err = -ENOMEM;
spinlock_t *busy = NULL;
- int size;
+ bool becomes_readable;
+ int size, rcvbuf;
- /* try to avoid the costly atomic add/sub pair when the receive
- * queue is full; always allow at least a packet
+ /* Immediately drop when the receive queue is full.
+ * Always allow at least one packet.
*/
rmem = atomic_read(&sk->sk_rmem_alloc);
- if (rmem > sk->sk_rcvbuf)
+ rcvbuf = READ_ONCE(sk->sk_rcvbuf);
+ if (rmem > rcvbuf)
goto drop;
/* Under mem pressure, it might be helpful to help udp_recvmsg()
@@ -1514,7 +1516,7 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
* - Less cache line misses at copyout() time
* - Less work at consume_skb() (less alien page frag freeing)
*/
- if (rmem > (sk->sk_rcvbuf >> 1)) {
+ if (rmem > (rcvbuf >> 1)) {
skb_condense(skb);
busy = busylock_acquire(sk);
@@ -1522,12 +1524,7 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
size = skb->truesize;
udp_set_dev_scratch(skb);
- /* we drop only if the receive buf is full and the receive
- * queue contains some other skb
- */
- rmem = atomic_add_return(size, &sk->sk_rmem_alloc);
- if (rmem > (size + (unsigned int)sk->sk_rcvbuf))
- goto uncharge_drop;
+ atomic_add(size, &sk->sk_rmem_alloc);
spin_lock(&list->lock);
err = udp_rmem_schedule(sk, size);
@@ -1543,12 +1540,19 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
*/
sock_skb_set_dropcount(sk, skb);
+ becomes_readable = skb_queue_empty(list);
__skb_queue_tail(list, skb);
spin_unlock(&list->lock);
- if (!sock_flag(sk, SOCK_DEAD))
- INDIRECT_CALL_1(sk->sk_data_ready, sock_def_readable, sk);
-
+ if (!sock_flag(sk, SOCK_DEAD)) {
+ if (becomes_readable ||
+ sk->sk_data_ready != sock_def_readable ||
+ READ_ONCE(sk->sk_peek_off) >= 0)
+ INDIRECT_CALL_1(sk->sk_data_ready,
+ sock_def_readable, sk);
+ else
+ sk_wake_async_rcu(sk, SOCK_WAKE_WAITD, POLL_IN);
+ }
busylock_release(busy);
return 0;
@@ -2056,8 +2060,8 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
drop_reason = SKB_DROP_REASON_PROTO_MEM;
}
UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
+ trace_udp_fail_queue_rcv_skb(rc, sk, skb);
kfree_skb_reason(skb, drop_reason);
- trace_udp_fail_queue_rcv_skb(rc, sk);
return -1;
}
diff --git a/net/ipv4/udp_tunnel_core.c b/net/ipv4/udp_tunnel_core.c
index 860aff5f8599..e4e0fa869fa4 100644
--- a/net/ipv4/udp_tunnel_core.c
+++ b/net/ipv4/udp_tunnel_core.c
@@ -183,7 +183,8 @@ void udp_tunnel_sock_release(struct socket *sock)
EXPORT_SYMBOL_GPL(udp_tunnel_sock_release);
struct metadata_dst *udp_tun_rx_dst(struct sk_buff *skb, unsigned short family,
- __be16 flags, __be64 tunnel_id, int md_size)
+ const unsigned long *flags,
+ __be64 tunnel_id, int md_size)
{
struct metadata_dst *tun_dst;
struct ip_tunnel_info *info;
@@ -199,7 +200,7 @@ struct metadata_dst *udp_tun_rx_dst(struct sk_buff *skb, unsigned short family,
info->key.tp_src = udp_hdr(skb)->source;
info->key.tp_dst = udp_hdr(skb)->dest;
if (udp_hdr(skb)->check)
- info->key.tun_flags |= TUNNEL_CSUM;
+ __set_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags);
return tun_dst;
}
EXPORT_SYMBOL_GPL(udp_tun_rx_dst);