From 8f4d19aacb64f2b3d65c8cf7974c3d153224b5f2 Mon Sep 17 00:00:00 2001
From: Gao Feng <gfree.wind@vip.163.com>
Date: Wed, 30 May 2018 10:29:31 +0800
Subject: netfilter: xt_CT: Reject the non-null terminated string from user
 space

The helper and timeout strings are from user-space, we need to make
sure they are null terminated. If not, evil user could make kernel
read the unexpected memory, even print it when fail to find by the
following codes.

pr_info_ratelimited("No such helper \"%s\"\n", helper_name);

Signed-off-by: Gao Feng <gfree.wind@vip.163.com>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/xt_CT.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'net')

diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 8790190c6feb..03b9a50ec93b 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -245,12 +245,22 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par,
 	}
 
 	if (info->helper[0]) {
+		if (strnlen(info->helper, sizeof(info->helper)) == sizeof(info->helper)) {
+			ret = -ENAMETOOLONG;
+			goto err3;
+		}
+
 		ret = xt_ct_set_helper(ct, info->helper, par);
 		if (ret < 0)
 			goto err3;
 	}
 
 	if (info->timeout[0]) {
+		if (strnlen(info->timeout, sizeof(info->timeout)) == sizeof(info->timeout)) {
+			ret = -ENAMETOOLONG;
+			goto err4;
+		}
+
 		ret = xt_ct_set_timeout(ct, par, info->timeout);
 		if (ret < 0)
 			goto err4;
-- 
cgit v1.2.3


From 9c7f96fd77b0dbe1fe7ed1f9c462c45dc48a1076 Mon Sep 17 00:00:00 2001
From: Alexey Kodanev <alexey.kodanev@oracle.com>
Date: Thu, 31 May 2018 19:53:33 +0300
Subject: netfilter: nf_tables: check msg_type before nft_trans_set(trans)

The patch moves the "trans->msg_type == NFT_MSG_NEWSET" check before
using nft_trans_set(trans). Otherwise we can get out of bounds read.

For example, KASAN reported the one when running 0001_cache_handling_0 nft
test. In this case "trans->msg_type" was NFT_MSG_NEWTABLE:

[75517.177808] BUG: KASAN: slab-out-of-bounds in nft_set_lookup_global+0x22f/0x270 [nf_tables]
[75517.279094] Read of size 8 at addr ffff881bdb643fc8 by task nft/7356
...
[75517.375605] CPU: 26 PID: 7356 Comm: nft Tainted: G  E   4.17.0-rc7.1.x86_64 #1
[75517.489587] Hardware name: Oracle Corporation SUN SERVER X4-2
[75517.618129] Call Trace:
[75517.648821]  dump_stack+0xd1/0x13b
[75517.691040]  ? show_regs_print_info+0x5/0x5
[75517.742519]  ? kmsg_dump_rewind_nolock+0xf5/0xf5
[75517.799300]  ? lock_acquire+0x143/0x310
[75517.846738]  print_address_description+0x85/0x3a0
[75517.904547]  kasan_report+0x18d/0x4b0
[75517.949892]  ? nft_set_lookup_global+0x22f/0x270 [nf_tables]
[75518.019153]  ? nft_set_lookup_global+0x22f/0x270 [nf_tables]
[75518.088420]  ? nft_set_lookup_global+0x22f/0x270 [nf_tables]
[75518.157689]  nft_set_lookup_global+0x22f/0x270 [nf_tables]
[75518.224869]  nf_tables_newsetelem+0x1a5/0x5d0 [nf_tables]
[75518.291024]  ? nft_add_set_elem+0x2280/0x2280 [nf_tables]
[75518.357154]  ? nla_parse+0x1a5/0x300
[75518.401455]  ? kasan_kmalloc+0xa6/0xd0
[75518.447842]  nfnetlink_rcv+0xc43/0x1bdf [nfnetlink]
[75518.507743]  ? nfnetlink_rcv+0x7a5/0x1bdf [nfnetlink]
[75518.569745]  ? nfnl_err_reset+0x3c0/0x3c0 [nfnetlink]
[75518.631711]  ? lock_acquire+0x143/0x310
[75518.679133]  ? netlink_deliver_tap+0x9b/0x1070
[75518.733840]  ? kasan_unpoison_shadow+0x31/0x40
[75518.788542]  netlink_unicast+0x45d/0x680
[75518.837111]  ? __isolate_free_page+0x890/0x890
[75518.891913]  ? netlink_attachskb+0x6b0/0x6b0
[75518.944542]  netlink_sendmsg+0x6fa/0xd30
[75518.993107]  ? netlink_unicast+0x680/0x680
[75519.043758]  ? netlink_unicast+0x680/0x680
[75519.094402]  sock_sendmsg+0xd9/0x160
[75519.138810]  ___sys_sendmsg+0x64d/0x980
[75519.186234]  ? copy_msghdr_from_user+0x350/0x350
[75519.243118]  ? lock_downgrade+0x650/0x650
[75519.292738]  ? do_raw_spin_unlock+0x5d/0x250
[75519.345456]  ? _raw_spin_unlock+0x24/0x30
[75519.395065]  ? __handle_mm_fault+0xbde/0x3410
[75519.448830]  ? sock_setsockopt+0x3d2/0x1940
[75519.500516]  ? __lock_acquire.isra.25+0xdc/0x19d0
[75519.558448]  ? lock_downgrade+0x650/0x650
[75519.608057]  ? __audit_syscall_entry+0x317/0x720
[75519.664960]  ? __fget_light+0x58/0x250
[75519.711325]  ? __sys_sendmsg+0xde/0x170
[75519.758850]  __sys_sendmsg+0xde/0x170
[75519.804193]  ? __ia32_sys_shutdown+0x90/0x90
[75519.856725]  ? syscall_trace_enter+0x897/0x10e0
[75519.912354]  ? trace_event_raw_event_sys_enter+0x920/0x920
[75519.979432]  ? __audit_syscall_entry+0x720/0x720
[75520.036118]  do_syscall_64+0xa3/0x3d0
[75520.081248]  ? prepare_exit_to_usermode+0x47/0x1d0
[75520.139904]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
[75520.201680] RIP: 0033:0x7fc153320ba0
[75520.245772] RSP: 002b:00007ffe294c3638 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
[75520.337708] RAX: ffffffffffffffda RBX: 00007ffe294c4820 RCX: 00007fc153320ba0
[75520.424547] RDX: 0000000000000000 RSI: 00007ffe294c46b0 RDI: 0000000000000003
[75520.511386] RBP: 00007ffe294c47b0 R08: 0000000000000004 R09: 0000000002114090
[75520.598225] R10: 00007ffe294c30a0 R11: 0000000000000246 R12: 00007ffe294c3660
[75520.684961] R13: 0000000000000001 R14: 00007ffe294c3650 R15: 0000000000000001

[75520.790946] Allocated by task 7356:
[75520.833994]  kasan_kmalloc+0xa6/0xd0
[75520.878088]  __kmalloc+0x189/0x450
[75520.920107]  nft_trans_alloc_gfp+0x20/0x190 [nf_tables]
[75520.983961]  nf_tables_newtable+0xcd0/0x1bd0 [nf_tables]
[75521.048857]  nfnetlink_rcv+0xc43/0x1bdf [nfnetlink]
[75521.108655]  netlink_unicast+0x45d/0x680
[75521.157013]  netlink_sendmsg+0x6fa/0xd30
[75521.205271]  sock_sendmsg+0xd9/0x160
[75521.249365]  ___sys_sendmsg+0x64d/0x980
[75521.296686]  __sys_sendmsg+0xde/0x170
[75521.341822]  do_syscall_64+0xa3/0x3d0
[75521.386957]  entry_SYSCALL_64_after_hwframe+0x44/0xa9

[75521.467867] Freed by task 23454:
[75521.507804]  __kasan_slab_free+0x132/0x180
[75521.558137]  kfree+0x14d/0x4d0
[75521.596005]  free_rt_sched_group+0x153/0x280
[75521.648410]  sched_autogroup_create_attach+0x19a/0x520
[75521.711330]  ksys_setsid+0x2ba/0x400
[75521.755529]  __ia32_sys_setsid+0xa/0x10
[75521.802850]  do_syscall_64+0xa3/0x3d0
[75521.848090]  entry_SYSCALL_64_after_hwframe+0x44/0xa9

[75521.929000] The buggy address belongs to the object at ffff881bdb643f80
 which belongs to the cache kmalloc-96 of size 96
[75522.079797] The buggy address is located 72 bytes inside of
 96-byte region [ffff881bdb643f80, ffff881bdb643fe0)
[75522.221234] The buggy address belongs to the page:
[75522.280100] page:ffffea006f6d90c0 count:1 mapcount:0 mapping:0000000000000000 index:0x0
[75522.377443] flags: 0x2fffff80000100(slab)
[75522.426956] raw: 002fffff80000100 0000000000000000 0000000000000000 0000000180200020
[75522.521275] raw: ffffea006e6fafc0 0000000c0000000c ffff881bf180f400 0000000000000000
[75522.615601] page dumped because: kasan: bad access detected

Fixes: 37a9cc525525 ("netfilter: nf_tables: add generation mask to sets")
Signed-off-by: Alexey Kodanev <alexey.kodanev@oracle.com>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 501e48a7965b..8d8dfe417014 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2728,12 +2728,13 @@ static struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
 	u32 id = ntohl(nla_get_be32(nla));
 
 	list_for_each_entry(trans, &net->nft.commit_list, list) {
-		struct nft_set *set = nft_trans_set(trans);
+		if (trans->msg_type == NFT_MSG_NEWSET) {
+			struct nft_set *set = nft_trans_set(trans);
 
-		if (trans->msg_type == NFT_MSG_NEWSET &&
-		    id == nft_trans_set_id(trans) &&
-		    nft_active_genmask(set, genmask))
-			return set;
+			if (id == nft_trans_set_id(trans) &&
+			    nft_active_genmask(set, genmask))
+				return set;
+		}
 	}
 	return ERR_PTR(-ENOENT);
 }
-- 
cgit v1.2.3


From 31875d4970baa02e08b719fdfea6f43e9e2f7e77 Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Thu, 24 May 2018 23:40:12 +0300
Subject: ipvs: register conntrack hooks for ftp

ip_vs_ftp requires conntrack modules for mangling
of FTP command responses in passive mode.

Make sure the conntrack hooks are registered when
real servers use NAT method in FTP virtual service.
The hooks will be registered while the service is
present.

Fixes: 0c66dc1ea3f0 ("netfilter: conntrack: register hooks in netns when needed by ruleset")
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Acked-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/ip_vs.h            | 30 ++++++++++++++++++++++++++++++
 net/netfilter/ipvs/ip_vs_ctl.c |  4 ++++
 2 files changed, 34 insertions(+)

(limited to 'net')

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index eb0bec043c96..ae72d9057eda 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -643,6 +643,7 @@ struct ip_vs_service {
 
 	/* alternate persistence engine */
 	struct ip_vs_pe __rcu	*pe;
+	int			conntrack_afmask;
 
 	struct rcu_head		rcu_head;
 };
@@ -1620,6 +1621,35 @@ static inline bool ip_vs_conn_uses_conntrack(struct ip_vs_conn *cp,
 	return false;
 }
 
+static inline int ip_vs_register_conntrack(struct ip_vs_service *svc)
+{
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+	int afmask = (svc->af == AF_INET6) ? 2 : 1;
+	int ret = 0;
+
+	if (!(svc->conntrack_afmask & afmask)) {
+		ret = nf_ct_netns_get(svc->ipvs->net, svc->af);
+		if (ret >= 0)
+			svc->conntrack_afmask |= afmask;
+	}
+	return ret;
+#else
+	return 0;
+#endif
+}
+
+static inline void ip_vs_unregister_conntrack(struct ip_vs_service *svc)
+{
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+	int afmask = (svc->af == AF_INET6) ? 2 : 1;
+
+	if (svc->conntrack_afmask & afmask) {
+		nf_ct_netns_put(svc->ipvs->net, svc->af);
+		svc->conntrack_afmask &= ~afmask;
+	}
+#endif
+}
+
 static inline int
 ip_vs_dest_conn_overhead(struct ip_vs_dest *dest)
 {
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 3ecca0616d8c..ee0ab278f1f1 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -835,6 +835,9 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
 		 *    For now only for NAT!
 		 */
 		ip_vs_rs_hash(ipvs, dest);
+		/* FTP-NAT requires conntrack for mangling */
+		if (svc->port == FTPPORT)
+			ip_vs_register_conntrack(svc);
 	}
 	atomic_set(&dest->conn_flags, conn_flags);
 
@@ -1458,6 +1461,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)
  */
 static void ip_vs_unlink_service(struct ip_vs_service *svc, bool cleanup)
 {
+	ip_vs_unregister_conntrack(svc);
 	/* Hold svc to avoid double release from dest_trash */
 	atomic_inc(&svc->refcnt);
 	/*
-- 
cgit v1.2.3


From 0cafa3926f0d8d72a2a814843f4db8cfef66d4ce Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Fri, 1 Jun 2018 19:12:28 +0900
Subject: netfilter: nft_reject_bridge: fix skb allocation size in
 nft_reject_br_send_v6_unreach

In order to allocate icmpv6 skb, sizeof(struct ipv6hdr) should be used.

Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/bridge/netfilter/nft_reject_bridge.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c
index eaf05de37f75..6de981270566 100644
--- a/net/bridge/netfilter/nft_reject_bridge.c
+++ b/net/bridge/netfilter/nft_reject_bridge.c
@@ -261,7 +261,7 @@ static void nft_reject_br_send_v6_unreach(struct net *net,
 	if (!reject6_br_csum_ok(oldskb, hook))
 		return;
 
-	nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct icmp6hdr) +
+	nskb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(struct icmp6hdr) +
 			 LL_MAX_HEADER + len, GFP_ATOMIC);
 	if (!nskb)
 		return;
-- 
cgit v1.2.3


From 6fcc02e3c2bddeaf628fde3c6a5ab3216d45691a Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Sat, 2 Jun 2018 21:52:15 +0300
Subject: ipvs: fix check on xmit to non-local addresses

There is mistake in the rt_mode_allow_non_local assignment.
It should be used to check if sending to non-local addresses is
allowed, now it checks if local addresses are allowed.

As local addresses are allowed for most of the cases, the only
places that are affected are for traffic to transparent cache
servers:

- bypass connections when cache server is not available
- related ICMP in FORWARD hook when sent to cache server

Fixes: 4a4739d56b00 ("ipvs: Pull out crosses_local_route_boundary logic")
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Acked-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/ipvs/ip_vs_xmit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 4527921b1c3a..8f7fff774283 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -168,7 +168,7 @@ static inline bool crosses_local_route_boundary(int skb_af, struct sk_buff *skb,
 						bool new_rt_is_local)
 {
 	bool rt_mode_allow_local = !!(rt_mode & IP_VS_RT_MODE_LOCAL);
-	bool rt_mode_allow_non_local = !!(rt_mode & IP_VS_RT_MODE_LOCAL);
+	bool rt_mode_allow_non_local = !!(rt_mode & IP_VS_RT_MODE_NON_LOCAL);
 	bool rt_mode_allow_redirect = !!(rt_mode & IP_VS_RT_MODE_RDR);
 	bool source_is_loopback;
 	bool old_rt_is_local;
-- 
cgit v1.2.3


From 9e8c8dabb78e886ace989729e763d28c76f5169e Mon Sep 17 00:00:00 2001
From: Alin Nastac <alin.nastac@gmail.com>
Date: Wed, 30 May 2018 15:19:36 +0200
Subject: netfilter: ebtables: fix compat entry padding

On arm64, ebt_entry_{match,watcher,target} structs are 40 bytes long
while on 32-bit arm these structs have a size of 36 bytes.

COMPAT_XT_ALIGN() macro cannot be used here to determine the necessary
padding for the CONFIG_COMPAT because it imposes an 8-byte boundary
alignment, condition that is not found in 32-bit ebtables application.

Signed-off-by: Alin Nastac <alin.nastac@gmail.com>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/bridge/netfilter/ebtables.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 6ba639f6c51d..5f459c8b7937 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1610,16 +1610,16 @@ struct compat_ebt_entry_mwt {
 		compat_uptr_t ptr;
 	} u;
 	compat_uint_t match_size;
-	compat_uint_t data[0];
+	compat_uint_t data[0] __attribute__ ((aligned (__alignof__(struct compat_ebt_replace))));
 };
 
 /* account for possible padding between match_size and ->data */
 static int ebt_compat_entry_padsize(void)
 {
-	BUILD_BUG_ON(XT_ALIGN(sizeof(struct ebt_entry_match)) <
-			COMPAT_XT_ALIGN(sizeof(struct compat_ebt_entry_mwt)));
-	return (int) XT_ALIGN(sizeof(struct ebt_entry_match)) -
-			COMPAT_XT_ALIGN(sizeof(struct compat_ebt_entry_mwt));
+	BUILD_BUG_ON(sizeof(struct ebt_entry_match) <
+			sizeof(struct compat_ebt_entry_mwt));
+	return (int) sizeof(struct ebt_entry_match) -
+			sizeof(struct compat_ebt_entry_mwt);
 }
 
 static int ebt_compat_match_offset(const struct xt_match *match,
-- 
cgit v1.2.3


From 9dcceb1378b6d66633f613805b2d5a22af4d5383 Mon Sep 17 00:00:00 2001
From: Serhey Popovych <serhe.popovych@gmail.com>
Date: Tue, 5 Jun 2018 11:46:13 +0200
Subject: netfilter: xt_set: Check hook mask correctly

Inserting rule before one with SET target we get error with warning in
dmesg(1) output:

  # iptables -A FORWARD -t mangle -j SET --map-set test src --map-prio
  # iptables -I FORWARD 1 -t mangle -j ACCEPT
  iptables: Invalid argument. Run `dmesg' for more information.
  # dmesg |tail -n1
  [268578.026643] mapping of prio or/and queue is allowed only from \
  OUTPUT/FORWARD/POSTROUTING chains

Rather than checking for supported hook bits for SET target check for
unsupported one as done in all rest of matches and targets.

Signed-off-by: Serhey Popovych <serhe.popovych@gmail.com>
Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 net/netfilter/xt_set.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c
index 6f4c5217d835..07af7dbf7a30 100644
--- a/net/netfilter/xt_set.c
+++ b/net/netfilter/xt_set.c
@@ -470,7 +470,7 @@ set_target_v3_checkentry(const struct xt_tgchk_param *par)
 		}
 		if (((info->flags & IPSET_FLAG_MAP_SKBPRIO) |
 		     (info->flags & IPSET_FLAG_MAP_SKBQUEUE)) &&
-		     !(par->hook_mask & (1 << NF_INET_FORWARD |
+		     (par->hook_mask & ~(1 << NF_INET_FORWARD |
 					 1 << NF_INET_LOCAL_OUT |
 					 1 << NF_INET_POST_ROUTING))) {
 			pr_info_ratelimited("mapping of prio or/and queue is allowed only from OUTPUT/FORWARD/POSTROUTING chains\n");
-- 
cgit v1.2.3


From 30a2e107108c66cbcb7776b58cbcd7db223a1cc9 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Tue, 5 Jun 2018 11:53:35 +0200
Subject: netfilter: ipset: Limit max timeout value

Due to the negative value condition in msecs_to_jiffies(), the real
max possible timeout value must be set to (UINT_MAX >> 1)/MSEC_PER_SEC.

Neutron Soutmun proposed the proper fix, but an insufficient one was
applied, see https://patchwork.ozlabs.org/patch/400405/.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set_timeout.h | 10 ++++++----
 net/netfilter/xt_set.c                         |  8 ++++----
 2 files changed, 10 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/include/linux/netfilter/ipset/ip_set_timeout.h b/include/linux/netfilter/ipset/ip_set_timeout.h
index 7ad8ddf9ca8a..8ce271e187b6 100644
--- a/include/linux/netfilter/ipset/ip_set_timeout.h
+++ b/include/linux/netfilter/ipset/ip_set_timeout.h
@@ -23,6 +23,9 @@
 /* Set is defined with timeout support: timeout value may be 0 */
 #define IPSET_NO_TIMEOUT	UINT_MAX
 
+/* Max timeout value, see msecs_to_jiffies() in jiffies.h */
+#define IPSET_MAX_TIMEOUT	(UINT_MAX >> 1)/MSEC_PER_SEC
+
 #define ip_set_adt_opt_timeout(opt, set)	\
 ((opt)->ext.timeout != IPSET_NO_TIMEOUT ? (opt)->ext.timeout : (set)->timeout)
 
@@ -32,11 +35,10 @@ ip_set_timeout_uget(struct nlattr *tb)
 	unsigned int timeout = ip_set_get_h32(tb);
 
 	/* Normalize to fit into jiffies */
-	if (timeout > UINT_MAX/MSEC_PER_SEC)
-		timeout = UINT_MAX/MSEC_PER_SEC;
+	if (timeout > IPSET_MAX_TIMEOUT)
+		timeout = IPSET_MAX_TIMEOUT;
 
-	/* Userspace supplied TIMEOUT parameter: adjust crazy size */
-	return timeout == IPSET_NO_TIMEOUT ? IPSET_NO_TIMEOUT - 1 : timeout;
+	return timeout;
 }
 
 static inline bool
diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c
index 07af7dbf7a30..bf2890b13212 100644
--- a/net/netfilter/xt_set.c
+++ b/net/netfilter/xt_set.c
@@ -372,8 +372,8 @@ set_target_v2(struct sk_buff *skb, const struct xt_action_param *par)
 
 	/* Normalize to fit into jiffies */
 	if (add_opt.ext.timeout != IPSET_NO_TIMEOUT &&
-	    add_opt.ext.timeout > UINT_MAX / MSEC_PER_SEC)
-		add_opt.ext.timeout = UINT_MAX / MSEC_PER_SEC;
+	    add_opt.ext.timeout > IPSET_MAX_TIMEOUT)
+		add_opt.ext.timeout = IPSET_MAX_TIMEOUT;
 	if (info->add_set.index != IPSET_INVALID_ID)
 		ip_set_add(info->add_set.index, skb, par, &add_opt);
 	if (info->del_set.index != IPSET_INVALID_ID)
@@ -407,8 +407,8 @@ set_target_v3(struct sk_buff *skb, const struct xt_action_param *par)
 
 	/* Normalize to fit into jiffies */
 	if (add_opt.ext.timeout != IPSET_NO_TIMEOUT &&
-	    add_opt.ext.timeout > UINT_MAX / MSEC_PER_SEC)
-		add_opt.ext.timeout = UINT_MAX / MSEC_PER_SEC;
+	    add_opt.ext.timeout > IPSET_MAX_TIMEOUT)
+		add_opt.ext.timeout = IPSET_MAX_TIMEOUT;
 	if (info->add_set.index != IPSET_INVALID_ID)
 		ip_set_add(info->add_set.index, skb, par, &add_opt);
 	if (info->del_set.index != IPSET_INVALID_ID)
-- 
cgit v1.2.3


From cbdebe481a14b42c45aa9f4ceb5ff19b55de2c57 Mon Sep 17 00:00:00 2001
From: Florent Fourcot <florent.fourcot@wifirst.fr>
Date: Mon, 4 Jun 2018 16:51:19 +0200
Subject: netfilter: ipset: forbid family for hash:mac sets

Userspace `ipset` command forbids family option for hash:mac type:

ipset create test hash:mac family inet4
ipset v6.30: Unknown argument: `family'

However, this check is not done in kernel itself. When someone use
external netlink applications (pyroute2 python library for example), one
can create hash:mac with invalid family and inconsistant results from
userspace (`ipset` command cannot read set content anymore).

This patch enforce the logic in kernel, and forbids insertion of
hash:mac with a family set.

Since IP_SET_PROTO_UNDEF is defined only for hash:mac, this patch has no
impact on other hash:* sets

Signed-off-by: Florent Fourcot <florent.fourcot@wifirst.fr>
Signed-off-by: Victorien Molle <victorien.molle@wifirst.fr>
Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 net/netfilter/ipset/ip_set_hash_gen.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index bbad940c0137..8a33dac4e805 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -1234,7 +1234,10 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
 	pr_debug("Create set %s with family %s\n",
 		 set->name, set->family == NFPROTO_IPV4 ? "inet" : "inet6");
 
-#ifndef IP_SET_PROTO_UNDEF
+#ifdef IP_SET_PROTO_UNDEF
+	if (set->family != NFPROTO_UNSPEC)
+		return -IPSET_ERR_INVALID_FAMILY;
+#else
 	if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6))
 		return -IPSET_ERR_INVALID_FAMILY;
 #endif
-- 
cgit v1.2.3


From 11ff7288beb2b7da889a014aff0a7b80bf8efcf3 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 6 Jun 2018 12:14:56 +0200
Subject: netfilter: ebtables: reject non-bridge targets

the ebtables evaluation loop expects targets to return
positive values (jumps), or negative values (absolute verdicts).

This is completely different from what xtables does.
In xtables, targets are expected to return the standard netfilter
verdicts, i.e. NF_DROP, NF_ACCEPT, etc.

ebtables will consider these as jumps.

Therefore reject any target found due to unspec fallback.
v2: also reject watchers.  ebtables ignores their return value, so
a target that assumes skb ownership (and returns NF_STOLEN) causes
use-after-free.

The only watchers in the 'ebtables' front-end are log and nflog;
both have AF_BRIDGE specific wrappers on kernel side.

Reported-by: syzbot+2b43f681169a2a0d306a@syzkaller.appspotmail.com
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/bridge/netfilter/ebtables.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'net')

diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 5f459c8b7937..08a65e4a77d0 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -396,6 +396,12 @@ ebt_check_watcher(struct ebt_entry_watcher *w, struct xt_tgchk_param *par,
 	watcher = xt_request_find_target(NFPROTO_BRIDGE, w->u.name, 0);
 	if (IS_ERR(watcher))
 		return PTR_ERR(watcher);
+
+	if (watcher->family != NFPROTO_BRIDGE) {
+		module_put(watcher->me);
+		return -ENOENT;
+	}
+
 	w->u.watcher = watcher;
 
 	par->target   = watcher;
@@ -715,6 +721,13 @@ ebt_check_entry(struct ebt_entry *e, struct net *net,
 		goto cleanup_watchers;
 	}
 
+	/* Reject UNSPEC, xtables verdicts/return values are incompatible */
+	if (target->family != NFPROTO_BRIDGE) {
+		module_put(target->me);
+		ret = -ENOENT;
+		goto cleanup_watchers;
+	}
+
 	t->u.target = target;
 	if (t->u.target == &ebt_standard_target) {
 		if (gap < sizeof(struct ebt_standard_target)) {
-- 
cgit v1.2.3


From 82e20b44477ffe90a5866caa209ecc9df818c6a1 Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Thu, 7 Jun 2018 02:05:12 +0900
Subject: netfilter: nft_set_rbtree: fix parameter of __nft_rbtree_lookup()

The parameter this doesn't have a flags value. so that it can't be
used by nft_rbtree_interval_end().

test commands:
   %nft add table ip filter
   %nft add set ip filter s { type ipv4_addr \; flags interval \; }
   %nft add element ip filter s {0-1}
   %nft add element ip filter s {2-10}
   %nft add chain ip filter input { type filter hook input priority 0\; }
   %nft add rule ip filter input ip saddr @s

Splat looks like:
[  246.752502] BUG: KASAN: slab-out-of-bounds in __nft_rbtree_lookup+0x677/0x6a0 [nft_set_rbtree]
[  246.752502] Read of size 1 at addr ffff88010d9efa47 by task http/1092

[  246.752502] CPU: 1 PID: 1092 Comm: http Not tainted 4.17.0-rc6+ #185
[  246.752502] Call Trace:
[  246.752502]  <IRQ>
[  246.752502]  dump_stack+0x74/0xbb
[  246.752502]  ? __nft_rbtree_lookup+0x677/0x6a0 [nft_set_rbtree]
[  246.752502]  print_address_description+0xc7/0x290
[  246.752502]  ? __nft_rbtree_lookup+0x677/0x6a0 [nft_set_rbtree]
[  246.752502]  kasan_report+0x22c/0x350
[  246.752502]  __nft_rbtree_lookup+0x677/0x6a0 [nft_set_rbtree]
[  246.752502]  nft_rbtree_lookup+0xc9/0x2d2 [nft_set_rbtree]
[  246.752502]  ? sched_clock_cpu+0x144/0x180
[  246.752502]  nft_lookup_eval+0x149/0x3a0 [nf_tables]
[  246.752502]  ? __lock_acquire+0xcea/0x4ed0
[  246.752502]  ? nft_lookup_init+0x6b0/0x6b0 [nf_tables]
[  246.752502]  nft_do_chain+0x263/0xf50 [nf_tables]
[  246.752502]  ? __nft_trace_packet+0x1a0/0x1a0 [nf_tables]
[  246.752502]  ? sched_clock_cpu+0x144/0x180
[ ... ]

Fixes: f9121355eb6f ("netfilter: nft_set_rbtree: incorrect assumption on lower interval lookups")
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_set_rbtree.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index e6f08bc5f359..26fa93b23805 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -65,7 +65,7 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set
 			parent = rcu_dereference_raw(parent->rb_left);
 			if (interval &&
 			    nft_rbtree_equal(set, this, interval) &&
-			    nft_rbtree_interval_end(this) &&
+			    nft_rbtree_interval_end(rbe) &&
 			    !nft_rbtree_interval_end(interval))
 				continue;
 			interval = rbe;
-- 
cgit v1.2.3


From c568503ef02030f169c9e19204def610a3510918 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 7 Jun 2018 21:34:43 +0200
Subject: netfilter: x_tables: initialise match/target check parameter struct

syzbot reports following splat:

BUG: KMSAN: uninit-value in ebt_stp_mt_check+0x24b/0x450
 net/bridge/netfilter/ebt_stp.c:162
 ebt_stp_mt_check+0x24b/0x450 net/bridge/netfilter/ebt_stp.c:162
 xt_check_match+0x1438/0x1650 net/netfilter/x_tables.c:506
 ebt_check_match net/bridge/netfilter/ebtables.c:372 [inline]
 ebt_check_entry net/bridge/netfilter/ebtables.c:702 [inline]

The uninitialised access is
   xt_mtchk_param->nft_compat

... which should be set to 0.
Fix it by zeroing the struct beforehand, same for tgchk.

ip(6)tables targetinfo uses c99-style initialiser, so no change
needed there.

Reported-by: syzbot+da4494182233c23a5fcf@syzkaller.appspotmail.com
Fixes: 55917a21d0cc0 ("netfilter: x_tables: add context to know if extension runs from nft_compat")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/bridge/netfilter/ebtables.c | 2 ++
 net/ipv4/netfilter/ip_tables.c  | 1 +
 net/ipv6/netfilter/ip6_tables.c | 1 +
 3 files changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 08a65e4a77d0..ead123dab05e 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -700,6 +700,8 @@ ebt_check_entry(struct ebt_entry *e, struct net *net,
 	}
 	i = 0;
 
+	memset(&mtpar, 0, sizeof(mtpar));
+	memset(&tgpar, 0, sizeof(tgpar));
 	mtpar.net	= tgpar.net       = net;
 	mtpar.table     = tgpar.table     = name;
 	mtpar.entryinfo = tgpar.entryinfo = e;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index e85f35b89c49..f6130704f052 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -531,6 +531,7 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
 		return -ENOMEM;
 
 	j = 0;
+	memset(&mtpar, 0, sizeof(mtpar));
 	mtpar.net	= net;
 	mtpar.table     = name;
 	mtpar.entryinfo = &e->ip;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 97f79dc943d7..685c2168f524 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -551,6 +551,7 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
 		return -ENOMEM;
 
 	j = 0;
+	memset(&mtpar, 0, sizeof(mtpar));
 	mtpar.net	= net;
 	mtpar.table     = name;
 	mtpar.entryinfo = &e->ipv6;
-- 
cgit v1.2.3


From 0975764684487bf3f7a47eef009e750ea41bd514 Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Mon, 11 Jun 2018 02:02:54 +0300
Subject: ipv6: allow PMTU exceptions to local routes

IPVS setups with local client and remote tunnel server need
to create exception for the local virtual IP. What we do is to
change PMTU from 64KB (on "lo") to 1460 in the common case.

Suggested-by: Martin KaFai Lau <kafai@fb.com>
Fixes: 45e4fd26683c ("ipv6: Only create RTF_CACHE routes after encountering pmtu exception")
Fixes: 7343ff31ebf0 ("ipv6: Don't create clones of host routes.")
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Acked-by: David Ahern <dsahern@gmail.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index fb956989adaf..86a0e4333d42 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2307,9 +2307,6 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
 	const struct in6_addr *daddr, *saddr;
 	struct rt6_info *rt6 = (struct rt6_info *)dst;
 
-	if (rt6->rt6i_flags & RTF_LOCAL)
-		return;
-
 	if (dst_metric_locked(dst, RTAX_MTU))
 		return;
 
-- 
cgit v1.2.3


From 349b71d6f427ff8211adf50839dbbff3f27c1805 Mon Sep 17 00:00:00 2001
From: Zhouyang Jia <jiazhouyang09@gmail.com>
Date: Mon, 11 Jun 2018 13:26:35 +0800
Subject: net: dsa: add error handling for pskb_trim_rcsum

When pskb_trim_rcsum fails, the lack of error-handling code may
cause unexpected results.

This patch adds error-handling code after calling pskb_trim_rcsum.

Signed-off-by: Zhouyang Jia <jiazhouyang09@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/tag_trailer.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c
index 7d20e1f3de28..56197f0d9608 100644
--- a/net/dsa/tag_trailer.c
+++ b/net/dsa/tag_trailer.c
@@ -75,7 +75,8 @@ static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev,
 	if (!skb->dev)
 		return NULL;
 
-	pskb_trim_rcsum(skb, skb->len - 4);
+	if (pskb_trim_rcsum(skb, skb->len - 4))
+		return NULL;
 
 	return skb;
 }
-- 
cgit v1.2.3


From a343993c518ce252b62ec00ac06bccfb1d17129d Mon Sep 17 00:00:00 2001
From: Björn Töpel <bjorn.topel@intel.com>
Date: Mon, 11 Jun 2018 13:57:12 +0200
Subject: xsk: silence warning on memory allocation failure
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

syzkaller reported a warning from xdp_umem_pin_pages():

  WARNING: CPU: 1 PID: 4537 at mm/slab_common.c:996 kmalloc_slab+0x56/0x70 mm/slab_common.c:996
  ...
  __do_kmalloc mm/slab.c:3713 [inline]
  __kmalloc+0x25/0x760 mm/slab.c:3727
  kmalloc_array include/linux/slab.h:634 [inline]
  kcalloc include/linux/slab.h:645 [inline]
  xdp_umem_pin_pages net/xdp/xdp_umem.c:205 [inline]
  xdp_umem_reg net/xdp/xdp_umem.c:318 [inline]
  xdp_umem_create+0x5c9/0x10f0 net/xdp/xdp_umem.c:349
  xsk_setsockopt+0x443/0x550 net/xdp/xsk.c:531
  __sys_setsockopt+0x1bd/0x390 net/socket.c:1935
  __do_sys_setsockopt net/socket.c:1946 [inline]
  __se_sys_setsockopt net/socket.c:1943 [inline]
  __x64_sys_setsockopt+0xbe/0x150 net/socket.c:1943
  do_syscall_64+0x1b1/0x800 arch/x86/entry/common.c:287
  entry_SYSCALL_64_after_hwframe+0x49/0xbe

This is a warning about attempting to allocate more than
KMALLOC_MAX_SIZE memory. The request originates from userspace, and if
the request is too big, the kernel is free to deny its allocation. In
this patch, the failed allocation attempt is silenced with
__GFP_NOWARN.

Fixes: c0c77d8fb787 ("xsk: add user memory registration support sockopt")
Reported-by: syzbot+4abadc5d69117b346506@syzkaller.appspotmail.com
Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 net/xdp/xdp_umem.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index b9ef487c4618..f47abb46c587 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -204,7 +204,8 @@ static int xdp_umem_pin_pages(struct xdp_umem *umem)
 	long npgs;
 	int err;
 
-	umem->pgs = kcalloc(umem->npgs, sizeof(*umem->pgs), GFP_KERNEL);
+	umem->pgs = kcalloc(umem->npgs, sizeof(*umem->pgs),
+			    GFP_KERNEL | __GFP_NOWARN);
 	if (!umem->pgs)
 		return -ENOMEM;
 
-- 
cgit v1.2.3


From f6fadff33e8b09373eedf99822b89d9dd84545b8 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Mon, 11 Jun 2018 23:22:04 +0200
Subject: tls: fix NULL pointer dereference on poll

While hacking on kTLS, I ran into the following panic from an
unprivileged netserver / netperf TCP session:

  BUG: unable to handle kernel NULL pointer dereference at 0000000000000000
  PGD 800000037f378067 P4D 800000037f378067 PUD 3c0e61067 PMD 0
  Oops: 0010 [#1] SMP KASAN PTI
  CPU: 1 PID: 2289 Comm: netserver Not tainted 4.17.0+ #139
  Hardware name: LENOVO 20FBCTO1WW/20FBCTO1WW, BIOS N1FET47W (1.21 ) 11/28/2016
  RIP: 0010:          (null)
  Code: Bad RIP value.
  RSP: 0018:ffff88036abcf740 EFLAGS: 00010246
  RAX: dffffc0000000000 RBX: ffff88036f5f6800 RCX: 1ffff1006debed26
  RDX: ffff88036abcf920 RSI: ffff8803cb1a4f00 RDI: ffff8803c258c280
  RBP: ffff8803c258c280 R08: ffff8803c258c280 R09: ffffed006f559d48
  R10: ffff88037aacea43 R11: ffffed006f559d49 R12: ffff8803c258c280
  R13: ffff8803cb1a4f20 R14: 00000000000000db R15: ffffffffc168a350
  FS:  00007f7e631f4700(0000) GS:ffff8803d1c80000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: ffffffffffffffd6 CR3: 00000003ccf64005 CR4: 00000000003606e0
  DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
  DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
  Call Trace:
   ? tls_sw_poll+0xa4/0x160 [tls]
   ? sock_poll+0x20a/0x680
   ? do_select+0x77b/0x11a0
   ? poll_schedule_timeout.constprop.12+0x130/0x130
   ? pick_link+0xb00/0xb00
   ? read_word_at_a_time+0x13/0x20
   ? vfs_poll+0x270/0x270
   ? deref_stack_reg+0xad/0xe0
   ? __read_once_size_nocheck.constprop.6+0x10/0x10
  [...]

Debugging further, it turns out that calling into ctx->sk_poll() is
invalid since sk_poll itself is NULL which was saved from the original
TCP socket in order for tls_sw_poll() to invoke it.

Looks like the recent conversion from poll to poll_mask callback started
in 152524231023 ("net: add support for ->poll_mask in proto_ops") missed
to eventually convert kTLS, too: TCP's ->poll was converted over to the
->poll_mask in commit 2c7d3dacebd4 ("net/tcp: convert to ->poll_mask")
and therefore kTLS wrongly saved the ->poll old one which is now NULL.

Convert kTLS over to use ->poll_mask instead. Also instead of POLLIN |
POLLRDNORM use the proper EPOLLIN | EPOLLRDNORM bits as the case in
tcp_poll_mask() as well that is mangled here.

Fixes: 2c7d3dacebd4 ("net/tcp: convert to ->poll_mask")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Dave Watson <davejwatson@fb.com>
Tested-by: Dave Watson <davejwatson@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tls.h  |  6 ++----
 net/tls/tls_main.c |  2 +-
 net/tls/tls_sw.c   | 19 +++++++++----------
 3 files changed, 12 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/include/net/tls.h b/include/net/tls.h
index 70c273777fe9..7f84ea3e217c 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -109,8 +109,7 @@ struct tls_sw_context_rx {
 
 	struct strparser strp;
 	void (*saved_data_ready)(struct sock *sk);
-	unsigned int (*sk_poll)(struct file *file, struct socket *sock,
-				struct poll_table_struct *wait);
+	__poll_t (*sk_poll_mask)(struct socket *sock, __poll_t events);
 	struct sk_buff *recv_pkt;
 	u8 control;
 	bool decrypted;
@@ -225,8 +224,7 @@ void tls_sw_free_resources_tx(struct sock *sk);
 void tls_sw_free_resources_rx(struct sock *sk);
 int tls_sw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 		   int nonblock, int flags, int *addr_len);
-unsigned int tls_sw_poll(struct file *file, struct socket *sock,
-			 struct poll_table_struct *wait);
+__poll_t tls_sw_poll_mask(struct socket *sock, __poll_t events);
 ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos,
 			   struct pipe_inode_info *pipe,
 			   size_t len, unsigned int flags);
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 301f22430469..a127d61e8af9 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -712,7 +712,7 @@ static int __init tls_register(void)
 	build_protos(tls_prots[TLSV4], &tcp_prot);
 
 	tls_sw_proto_ops = inet_stream_ops;
-	tls_sw_proto_ops.poll = tls_sw_poll;
+	tls_sw_proto_ops.poll_mask = tls_sw_poll_mask;
 	tls_sw_proto_ops.splice_read = tls_sw_splice_read;
 
 #ifdef CONFIG_TLS_DEVICE
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 8ca57d01b18f..34895b7c132d 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -915,23 +915,22 @@ splice_read_end:
 	return copied ? : err;
 }
 
-unsigned int tls_sw_poll(struct file *file, struct socket *sock,
-			 struct poll_table_struct *wait)
+__poll_t tls_sw_poll_mask(struct socket *sock, __poll_t events)
 {
-	unsigned int ret;
 	struct sock *sk = sock->sk;
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
+	__poll_t mask;
 
-	/* Grab POLLOUT and POLLHUP from the underlying socket */
-	ret = ctx->sk_poll(file, sock, wait);
+	/* Grab EPOLLOUT and EPOLLHUP from the underlying socket */
+	mask = ctx->sk_poll_mask(sock, events);
 
-	/* Clear POLLIN bits, and set based on recv_pkt */
-	ret &= ~(POLLIN | POLLRDNORM);
+	/* Clear EPOLLIN bits, and set based on recv_pkt */
+	mask &= ~(EPOLLIN | EPOLLRDNORM);
 	if (ctx->recv_pkt)
-		ret |= POLLIN | POLLRDNORM;
+		mask |= EPOLLIN | EPOLLRDNORM;
 
-	return ret;
+	return mask;
 }
 
 static int tls_read_size(struct strparser *strp, struct sk_buff *skb)
@@ -1188,7 +1187,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
 		sk->sk_data_ready = tls_data_ready;
 		write_unlock_bh(&sk->sk_callback_lock);
 
-		sw_ctx_rx->sk_poll = sk->sk_socket->ops->poll;
+		sw_ctx_rx->sk_poll_mask = sk->sk_socket->ops->poll_mask;
 
 		strp_check_rcv(&sw_ctx_rx->strp);
 	}
-- 
cgit v1.2.3


From 3f2d67b6bd24c615cfe3a6d793613bb2838dd9b9 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Mon, 11 Jun 2018 07:12:12 -0700
Subject: net/ipv6: Ensure cfg is properly initialized in ipv6_create_tempaddr

Valdis reported a BUG in ipv6_add_addr:

[ 1820.832682] BUG: unable to handle kernel NULL pointer dereference at 0000000000000209
[ 1820.832728] RIP: 0010:ipv6_add_addr+0x280/0xd10
[ 1820.832732] Code: 49 8b 1f 0f 84 6a 0a 00 00 48 85 db 0f 84 4e 0a 00 00 48 8b 03 48 8b 53 08 49 89 45 00 49 8b 47 10
49 89 55 08 48 85 c0 74 15 <48> 8b 50 08 48 8b 00 49 89 95 b8 01 00 00 49 89 85 b0 01 00 00 4c
[ 1820.832847] RSP: 0018:ffffaa07c2fd7880 EFLAGS: 00010202
[ 1820.832853] RAX: 0000000000000201 RBX: ffffaa07c2fd79b0 RCX: 0000000000000000
[ 1820.832858] RDX: a4cfbfba2cbfa64c RSI: 0000000000000000 RDI: ffffffff8a8e9fa0
[ 1820.832862] RBP: ffffaa07c2fd7920 R08: 000000000000017a R09: ffffffff8a555300
[ 1820.832866] R10: 0000000000000000 R11: 0000000000000000 R12: ffff888d18e71c00
[ 1820.832871] R13: ffff888d0a9b1200 R14: 0000000000000000 R15: ffffaa07c2fd7980
[ 1820.832876] FS:  00007faa51bdb800(0000) GS:ffff888d1d400000(0000) knlGS:0000000000000000
[ 1820.832880] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 1820.832885] CR2: 0000000000000209 CR3: 000000021e8f8001 CR4: 00000000001606e0
[ 1820.832888] Call Trace:
[ 1820.832898]  ? __local_bh_enable_ip+0x119/0x260
[ 1820.832904]  ? ipv6_create_tempaddr+0x259/0x5a0
[ 1820.832912]  ? __local_bh_enable_ip+0x139/0x260
[ 1820.832921]  ipv6_create_tempaddr+0x2da/0x5a0
[ 1820.832926]  ? ipv6_create_tempaddr+0x2da/0x5a0
[ 1820.832941]  manage_tempaddrs+0x1a5/0x240
[ 1820.832951]  inet6_addr_del+0x20b/0x3b0
[ 1820.832959]  ? nla_parse+0xce/0x1e0
[ 1820.832968]  inet6_rtm_deladdr+0xd9/0x210
[ 1820.832981]  rtnetlink_rcv_msg+0x1d4/0x5f0

Looking at the code I found 1 element (peer_pfx) of the newly introduced
ifa6_config struct that is not initialized. Use a memset rather than hard
coding an init for each struct element.

Reported-by: Valdis Kletnieks <valdis.kletnieks@vt.edu>
Fixes: e6464b8c63619 ("net/ipv6: Convert ipv6_add_addr to struct ifa6_config")
Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 89019bf59f46..c134286d6a41 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1324,6 +1324,7 @@ retry:
 		}
 	}
 
+	memset(&cfg, 0, sizeof(cfg));
 	cfg.valid_lft = min_t(__u32, ifp->valid_lft,
 			      idev->cnf.temp_valid_lft + age);
 	cfg.preferred_lft = cnf_temp_preferred_lft + age - idev->desync_factor;
@@ -1357,7 +1358,6 @@ retry:
 
 	cfg.pfx = &addr;
 	cfg.scope = ipv6_addr_scope(cfg.pfx);
-	cfg.rt_priority = 0;
 
 	ift = ipv6_add_addr(idev, &cfg, block, NULL);
 	if (IS_ERR(ift)) {
-- 
cgit v1.2.3


From 6892286e9c09925780fe2cb6db3585b56b71fe8e Mon Sep 17 00:00:00 2001
From: David Miller <davem@davemloft.net>
Date: Mon, 11 Jun 2018 18:00:13 -0700
Subject: tcp: Do not reload skb pointer after skb_gro_receive().

This is not necessary.  skb_gro_receive() will never change what
'head' points to.

In it's original implementation (see commit 71d93b39e52e ("net: Add
skb_gro_receive")), it did:

====================
+	*head = nskb;
+	nskb->next = p->next;
+	p->next = NULL;
====================

This sequence was removed in commit 58025e46ea2d ("net: gro: remove
obsolete code from skb_gro_receive()")

Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/ipv4/tcp_offload.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 4d58e2ce0b5b..8cc7c3487330 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -268,8 +268,6 @@ found:
 		goto out_check_final;
 	}
 
-	p = *head;
-	th2 = tcp_hdr(p);
 	tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH);
 
 out_check_final:
-- 
cgit v1.2.3


From 3fb61eca185cc65a1be23d9a5a11347eef79f597 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 7 Jun 2018 17:56:08 +0200
Subject: netfilter: nft_socket: fix module autoload

Add alias definition for module autoload when adding socket rules.

Fixes: 554ced0a6e29 ("netfilter: nf_tables: add support for native socket matching")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_socket.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c
index f28a0b944087..74e1b3bd6954 100644
--- a/net/netfilter/nft_socket.c
+++ b/net/netfilter/nft_socket.c
@@ -142,3 +142,4 @@ module_exit(nft_socket_module_exit);
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Máté Eckl");
 MODULE_DESCRIPTION("nf_tables socket match module");
+MODULE_ALIAS_NFT_EXPR("socket");
-- 
cgit v1.2.3


From 215a31f19dedd4e92a67cf5a9717ee898d012b3a Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 11 Jun 2018 17:18:29 +0200
Subject: netfilter: nft_dynset: do not reject set updates with NFT_SET_EVAL

NFT_SET_EVAL is signalling the kernel that this sets can be updated from
the evaluation path, even if there are no expressions attached to the
element. Otherwise, set updates with no expressions fail. Update
description to describe the right semantics.

Fixes: 22fe54d5fefc ("netfilter: nf_tables: add support for dynamic set updates")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_tables.h | 2 +-
 net/netfilter/nft_dynset.c               | 4 +---
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index c9bf74b94f37..89438e68dc03 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -266,7 +266,7 @@ enum nft_rule_compat_attributes {
  * @NFT_SET_INTERVAL: set contains intervals
  * @NFT_SET_MAP: set is used as a dictionary
  * @NFT_SET_TIMEOUT: set uses timeouts
- * @NFT_SET_EVAL: set contains expressions for evaluation
+ * @NFT_SET_EVAL: set can be updated from the evaluation path
  * @NFT_SET_OBJECT: set contains stateful objects
  */
 enum nft_set_flags {
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 4d49529cff61..27d7e4598ab6 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -203,9 +203,7 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
 				goto err1;
 			set->ops->gc_init(set);
 		}
-
-	} else if (set->flags & NFT_SET_EVAL)
-		return -EINVAL;
+	}
 
 	nft_set_ext_prepare(&priv->tmpl);
 	nft_set_ext_add_length(&priv->tmpl, NFT_SET_EXT_KEY, set->klen);
-- 
cgit v1.2.3


From 71ad00c50d77e507138c792a9646b53c16f22e11 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 11 Jun 2018 13:20:35 +0200
Subject: netfilter: nf_tables: fix module unload race

We must first remove the nfnetlink protocol handler when nf_tables module
is unloaded -- we don't want userspace to submit new change requests once
we've started to tear down nft state.

Furthermore, nfnetlink must not call any subsystem function after
call_batch returned -EAGAIN.

EAGAIN means the subsys mutex was dropped, so its unlikely but possible that
nf_tables subsystem was removed due to 'rmmod nf_tables' on another cpu.

Therefore, we must abort batch completely and not move on to next part of
the batch.

Last, we can't invoke ->abort unless we've checked that the subsystem is
still registered.

Change netns exit path of nf_tables to make sure any incompleted
transaction gets removed on exit.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 12 +++++++++---
 net/netfilter/nfnetlink.c     | 10 +++++++---
 2 files changed, 16 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 7979095b69b0..ae312b31db28 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -6439,7 +6439,7 @@ static void nf_tables_abort_release(struct nft_trans *trans)
 	kfree(trans);
 }
 
-static int nf_tables_abort(struct net *net, struct sk_buff *skb)
+static int __nf_tables_abort(struct net *net)
 {
 	struct nft_trans *trans, *next;
 	struct nft_trans_elem *te;
@@ -6555,6 +6555,11 @@ static void nf_tables_cleanup(struct net *net)
 	nft_validate_state_update(net, NFT_VALIDATE_SKIP);
 }
 
+static int nf_tables_abort(struct net *net, struct sk_buff *skb)
+{
+	return __nf_tables_abort(net);
+}
+
 static bool nf_tables_valid_genid(struct net *net, u32 genid)
 {
 	return net->nft.base_seq == genid;
@@ -7149,9 +7154,10 @@ static int __net_init nf_tables_init_net(struct net *net)
 
 static void __net_exit nf_tables_exit_net(struct net *net)
 {
+	if (!list_empty(&net->nft.commit_list))
+		__nf_tables_abort(net);
 	__nft_release_tables(net);
 	WARN_ON_ONCE(!list_empty(&net->nft.tables));
-	WARN_ON_ONCE(!list_empty(&net->nft.commit_list));
 }
 
 static struct pernet_operations nf_tables_net_ops = {
@@ -7193,9 +7199,9 @@ err1:
 
 static void __exit nf_tables_module_exit(void)
 {
-	unregister_pernet_subsys(&nf_tables_net_ops);
 	nfnetlink_subsys_unregister(&nf_tables_subsys);
 	unregister_netdevice_notifier(&nf_tables_flowtable_notifier);
+	unregister_pernet_subsys(&nf_tables_net_ops);
 	rcu_barrier();
 	nf_tables_core_module_exit();
 	kfree(info);
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 4d0da7042aff..e1b6be29848d 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -429,7 +429,7 @@ replay:
 			 */
 			if (err == -EAGAIN) {
 				status |= NFNL_BATCH_REPLAY;
-				goto next;
+				goto done;
 			}
 		}
 ack:
@@ -456,7 +456,7 @@ ack:
 			if (err)
 				status |= NFNL_BATCH_FAILURE;
 		}
-next:
+
 		msglen = NLMSG_ALIGN(nlh->nlmsg_len);
 		if (msglen > skb->len)
 			msglen = skb->len;
@@ -464,7 +464,11 @@ next:
 	}
 done:
 	if (status & NFNL_BATCH_REPLAY) {
-		ss->abort(net, oskb);
+		const struct nfnetlink_subsystem *ss2;
+
+		ss2 = nfnl_dereference_protected(subsys_id);
+		if (ss2 == ss)
+			ss->abort(net, oskb);
 		nfnl_err_reset(&err_list);
 		nfnl_unlock(subsys_id);
 		kfree_skb(skb);
-- 
cgit v1.2.3


From 0a2cf5ee432c2e8718af3553a56a3760d767b736 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 11 Jun 2018 13:20:36 +0200
Subject: netfilter: nf_tables: close race between netns exit and rmmod

If net namespace is exiting while nf_tables module is being removed
we can oops:

 BUG: unable to handle kernel NULL pointer dereference at 0000000000000040
 IP: nf_tables_flowtable_event+0x43/0xf0 [nf_tables]
 PGD 0 P4D 0
 Oops: 0000 [#1] SMP PTI
 Modules linked in: nf_tables(-) nfnetlink [..]
  unregister_netdevice_notifier+0xdd/0x130
  nf_tables_module_exit+0x24/0x3a [nf_tables]
  SyS_delete_module+0x1c5/0x240
  do_syscall_64+0x74/0x190

Avoid this by attempting to take reference on the net namespace from
the notifiers.  If it fails the namespace is exiting already, and nft
core is taking care of cleanup work.

We also need to make sure the netdev hook type gets removed
before netns ops removal, else notifier might be invoked with device
event for a netns where net->nft was never initialised (because
pernet ops was removed beforehand).

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c    | 13 ++++++++++---
 net/netfilter/nft_chain_filter.c |  5 +++++
 2 files changed, 15 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index ae312b31db28..d23a5c269c44 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -5837,18 +5837,23 @@ static int nf_tables_flowtable_event(struct notifier_block *this,
 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 	struct nft_flowtable *flowtable;
 	struct nft_table *table;
+	struct net *net;
 
 	if (event != NETDEV_UNREGISTER)
 		return 0;
 
+	net = maybe_get_net(dev_net(dev));
+	if (!net)
+		return 0;
+
 	nfnl_lock(NFNL_SUBSYS_NFTABLES);
-	list_for_each_entry(table, &dev_net(dev)->nft.tables, list) {
+	list_for_each_entry(table, &net->nft.tables, list) {
 		list_for_each_entry(flowtable, &table->flowtables, list) {
 			nft_flowtable_event(event, dev, flowtable);
 		}
 	}
 	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
-
+	put_net(net);
 	return NOTIFY_DONE;
 }
 
@@ -7154,9 +7159,11 @@ static int __net_init nf_tables_init_net(struct net *net)
 
 static void __net_exit nf_tables_exit_net(struct net *net)
 {
+	nfnl_lock(NFNL_SUBSYS_NFTABLES);
 	if (!list_empty(&net->nft.commit_list))
 		__nf_tables_abort(net);
 	__nft_release_tables(net);
+	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
 	WARN_ON_ONCE(!list_empty(&net->nft.tables));
 }
 
@@ -7201,11 +7208,11 @@ static void __exit nf_tables_module_exit(void)
 {
 	nfnetlink_subsys_unregister(&nf_tables_subsys);
 	unregister_netdevice_notifier(&nf_tables_flowtable_notifier);
+	nft_chain_filter_fini();
 	unregister_pernet_subsys(&nf_tables_net_ops);
 	rcu_barrier();
 	nf_tables_core_module_exit();
 	kfree(info);
-	nft_chain_filter_fini();
 }
 
 module_init(nf_tables_module_init);
diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c
index 84c902477a91..d21834bed805 100644
--- a/net/netfilter/nft_chain_filter.c
+++ b/net/netfilter/nft_chain_filter.c
@@ -318,6 +318,10 @@ static int nf_tables_netdev_event(struct notifier_block *this,
 	    event != NETDEV_CHANGENAME)
 		return NOTIFY_DONE;
 
+	ctx.net = maybe_get_net(ctx.net);
+	if (!ctx.net)
+		return NOTIFY_DONE;
+
 	nfnl_lock(NFNL_SUBSYS_NFTABLES);
 	list_for_each_entry(table, &ctx.net->nft.tables, list) {
 		if (table->family != NFPROTO_NETDEV)
@@ -334,6 +338,7 @@ static int nf_tables_netdev_event(struct notifier_block *this,
 		}
 	}
 	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+	put_net(ctx.net);
 
 	return NOTIFY_DONE;
 }
-- 
cgit v1.2.3


From adc972c5b88829d38ede08b1069718661c7330ae Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Mon, 11 Jun 2018 22:16:33 +0900
Subject: netfilter: nf_tables: use WARN_ON_ONCE instead of BUG_ON in
 nft_do_chain()

When depth of chain is bigger than NFT_JUMP_STACK_SIZE, the nft_do_chain
crashes. But there is no need to crash hard here.

Suggested-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_core.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index deff10adef9c..8de912ca53d3 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -183,7 +183,8 @@ next_rule:
 
 	switch (regs.verdict.code) {
 	case NFT_JUMP:
-		BUG_ON(stackptr >= NFT_JUMP_STACK_SIZE);
+		if (WARN_ON_ONCE(stackptr >= NFT_JUMP_STACK_SIZE))
+			return NF_DROP;
 		jumpstack[stackptr].chain = chain;
 		jumpstack[stackptr].rules = rules + 1;
 		stackptr++;
-- 
cgit v1.2.3


From c05a45c0865d986a8aea373cd5297dbfded6882e Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 11 Jun 2018 22:22:19 +0200
Subject: netfilter: ctnetlink: avoid null pointer dereference

Dan Carpenter points out that deref occurs after NULL check, we should
re-fetch the pointer and check that instead.

Fixes: 2c205dd3981f7 ("netfilter: add struct nf_nat_hook and use it")
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_netlink.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 39327a42879f..20a2e37c76d1 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1446,7 +1446,8 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct,
 		}
 		nfnl_lock(NFNL_SUBSYS_CTNETLINK);
 		rcu_read_lock();
-		if (nat_hook->parse_nat_setup)
+		nat_hook = rcu_dereference(nf_nat_hook);
+		if (nat_hook)
 			return -EAGAIN;
 #endif
 		return -EOPNOTSUPP;
-- 
cgit v1.2.3


From fc6ddbecce440df74fb4491c17c372b52cf5be83 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 12 Jun 2018 18:36:19 +0200
Subject: netfilter: xt_connmark: fix list corruption on rmmod

This needs to use xt_unregister_targets, else new revision is left
on the list which then causes list to point to a target struct that has been free'd.

Fixes: 472a73e00757 ("netfilter: xt_conntrack: Support bit-shifting for CONNMARK & MARK targets.")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/xt_connmark.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index 94df000abb92..29c38aa7f726 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -211,7 +211,7 @@ static int __init connmark_mt_init(void)
 static void __exit connmark_mt_exit(void)
 {
 	xt_unregister_match(&connmark_mt_reg);
-	xt_unregister_target(connmark_tg_reg);
+	xt_unregister_targets(connmark_tg_reg, ARRAY_SIZE(connmark_tg_reg));
 }
 
 module_init(connmark_mt_init);
-- 
cgit v1.2.3


From 21ba8847f857028dc83a0f341e16ecc616e34740 Mon Sep 17 00:00:00 2001
From: Yi-Hung Wei <yihung.wei@gmail.com>
Date: Tue, 12 Jun 2018 10:51:34 -0700
Subject: netfilter: nf_conncount: Fix garbage collection with zones
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently, we use check_hlist() for garbage colleciton. However, we
use the ‘zone’ from the counted entry to query the existence of
existing entries in the hlist. This could be wrong when they are in
different zones, and this patch fixes this issue.

Fixes: e59ea3df3fc2 ("netfilter: xt_connlimit: honor conntrack zone if available")
Signed-off-by: Yi-Hung Wei <yihung.wei@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_count.h |  3 ++-
 net/netfilter/nf_conncount.c               | 13 +++++++++----
 net/netfilter/nft_connlimit.c              |  2 +-
 3 files changed, 12 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/include/net/netfilter/nf_conntrack_count.h b/include/net/netfilter/nf_conntrack_count.h
index 1910b6572430..3a188a0923a3 100644
--- a/include/net/netfilter/nf_conntrack_count.h
+++ b/include/net/netfilter/nf_conntrack_count.h
@@ -20,7 +20,8 @@ unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head,
 				 bool *addit);
 
 bool nf_conncount_add(struct hlist_head *head,
-		      const struct nf_conntrack_tuple *tuple);
+		      const struct nf_conntrack_tuple *tuple,
+		      const struct nf_conntrack_zone *zone);
 
 void nf_conncount_cache_free(struct hlist_head *hhead);
 
diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c
index 3b5059a8dcdd..d8383609fe28 100644
--- a/net/netfilter/nf_conncount.c
+++ b/net/netfilter/nf_conncount.c
@@ -46,6 +46,7 @@
 struct nf_conncount_tuple {
 	struct hlist_node		node;
 	struct nf_conntrack_tuple	tuple;
+	struct nf_conntrack_zone	zone;
 };
 
 struct nf_conncount_rb {
@@ -80,7 +81,8 @@ static int key_diff(const u32 *a, const u32 *b, unsigned int klen)
 }
 
 bool nf_conncount_add(struct hlist_head *head,
-		      const struct nf_conntrack_tuple *tuple)
+		      const struct nf_conntrack_tuple *tuple,
+		      const struct nf_conntrack_zone *zone)
 {
 	struct nf_conncount_tuple *conn;
 
@@ -88,6 +90,7 @@ bool nf_conncount_add(struct hlist_head *head,
 	if (conn == NULL)
 		return false;
 	conn->tuple = *tuple;
+	conn->zone = *zone;
 	hlist_add_head(&conn->node, head);
 	return true;
 }
@@ -108,7 +111,7 @@ unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head,
 
 	/* check the saved connections */
 	hlist_for_each_entry_safe(conn, n, head, node) {
-		found = nf_conntrack_find_get(net, zone, &conn->tuple);
+		found = nf_conntrack_find_get(net, &conn->zone, &conn->tuple);
 		if (found == NULL) {
 			hlist_del(&conn->node);
 			kmem_cache_free(conncount_conn_cachep, conn);
@@ -117,7 +120,8 @@ unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head,
 
 		found_ct = nf_ct_tuplehash_to_ctrack(found);
 
-		if (tuple && nf_ct_tuple_equal(&conn->tuple, tuple)) {
+		if (tuple && nf_ct_tuple_equal(&conn->tuple, tuple) &&
+		    nf_ct_zone_equal(found_ct, zone, zone->dir)) {
 			/*
 			 * Just to be sure we have it only once in the list.
 			 * We should not see tuples twice unless someone hooks
@@ -196,7 +200,7 @@ count_tree(struct net *net, struct rb_root *root,
 			if (!addit)
 				return count;
 
-			if (!nf_conncount_add(&rbconn->hhead, tuple))
+			if (!nf_conncount_add(&rbconn->hhead, tuple, zone))
 				return 0; /* hotdrop */
 
 			return count + 1;
@@ -238,6 +242,7 @@ count_tree(struct net *net, struct rb_root *root,
 	}
 
 	conn->tuple = *tuple;
+	conn->zone = *zone;
 	memcpy(rbconn->key, key, sizeof(u32) * keylen);
 
 	INIT_HLIST_HEAD(&rbconn->hhead);
diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c
index 50c068d660e5..a832c59f0a9c 100644
--- a/net/netfilter/nft_connlimit.c
+++ b/net/netfilter/nft_connlimit.c
@@ -52,7 +52,7 @@ static inline void nft_connlimit_do_eval(struct nft_connlimit *priv,
 	if (!addit)
 		goto out;
 
-	if (!nf_conncount_add(&priv->hhead, tuple_ptr)) {
+	if (!nf_conncount_add(&priv->hhead, tuple_ptr, zone)) {
 		regs->verdict.code = NF_DROP;
 		spin_unlock_bh(&priv->lock);
 		return;
-- 
cgit v1.2.3


From cdb8744d80352b55c622d049a6c91f449cd291f8 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Tue, 12 Jun 2018 10:05:55 -0700
Subject: Revert "net: do not allow changing SO_REUSEADDR/SO_REUSEPORT on bound
 sockets"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Revert the patch mentioned in the subject because it breaks at least
the Avahi mDNS daemon. That patch namely causes the Ubuntu 18.04 Avahi
daemon to fail to start:

Jun 12 09:49:24 ubuntu-vm avahi-daemon[529]: Successfully called chroot().
Jun 12 09:49:24 ubuntu-vm avahi-daemon[529]: Successfully dropped remaining capabilities.
Jun 12 09:49:24 ubuntu-vm avahi-daemon[529]: No service file found in /etc/avahi/services.
Jun 12 09:49:24 ubuntu-vm avahi-daemon[529]: SO_REUSEADDR failed: Structure needs cleaning
Jun 12 09:49:24 ubuntu-vm avahi-daemon[529]: SO_REUSEADDR failed: Structure needs cleaning
Jun 12 09:49:24 ubuntu-vm avahi-daemon[529]: Failed to create server: No suitable network protocol available
Jun 12 09:49:24 ubuntu-vm avahi-daemon[529]: avahi-daemon 0.7 exiting.
Jun 12 09:49:24 ubuntu-vm systemd[1]: avahi-daemon.service: Main process exited, code=exited, status=255/n/a
Jun 12 09:49:24 ubuntu-vm systemd[1]: avahi-daemon.service: Failed with result 'exit-code'.
Jun 12 09:49:24 ubuntu-vm systemd[1]: Failed to start Avahi mDNS/DNS-SD Stack.

Fixes: f396922d862a ("net: do not allow changing SO_REUSEADDR/SO_REUSEPORT on bound sockets")
Cc: Maciej Żenczykowski <maze@google.com>
Cc: Eric Dumazet <edumazet@google.com>
Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c | 15 +--------------
 1 file changed, 1 insertion(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/core/sock.c b/net/core/sock.c
index f333d75ef1a9..bcc41829a16d 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -728,22 +728,9 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
 			sock_valbool_flag(sk, SOCK_DBG, valbool);
 		break;
 	case SO_REUSEADDR:
-		val = (valbool ? SK_CAN_REUSE : SK_NO_REUSE);
-		if ((sk->sk_family == PF_INET || sk->sk_family == PF_INET6) &&
-		    inet_sk(sk)->inet_num &&
-		    (sk->sk_reuse != val)) {
-			ret = (sk->sk_state == TCP_ESTABLISHED) ? -EISCONN : -EUCLEAN;
-			break;
-		}
-		sk->sk_reuse = val;
+		sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE);
 		break;
 	case SO_REUSEPORT:
-		if ((sk->sk_family == PF_INET || sk->sk_family == PF_INET6) &&
-		    inet_sk(sk)->inet_num &&
-		    (sk->sk_reuseport != valbool)) {
-			ret = (sk->sk_state == TCP_ESTABLISHED) ? -EISCONN : -EUCLEAN;
-			break;
-		}
 		sk->sk_reuseport = valbool;
 		break;
 	case SO_TYPE:
-- 
cgit v1.2.3


From c0129a0614428e5e4350fa963eecd1fbe19e57e9 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Mon, 11 Jun 2018 14:07:14 -0700
Subject: smc: convert to ->poll_mask

smc->clcsock is an internal TCP socket, after TCP socket
converts to ->poll_mask, ->poll doesn't exist any more.
So just convert smc socket to ->poll_mask too.

Fixes: 2c7d3dacebd4 ("net/tcp: convert to ->poll_mask")
Reported-by: syzbot+f5066e369b2d5fff630f@syzkaller.appspotmail.com
Cc: Christoph Hellwig <hch@lst.de>
Cc: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/af_smc.c | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 973b4471b532..da7f02edcd37 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -1273,8 +1273,7 @@ static __poll_t smc_accept_poll(struct sock *parent)
 	return mask;
 }
 
-static __poll_t smc_poll(struct file *file, struct socket *sock,
-			     poll_table *wait)
+static __poll_t smc_poll_mask(struct socket *sock, __poll_t events)
 {
 	struct sock *sk = sock->sk;
 	__poll_t mask = 0;
@@ -1290,7 +1289,7 @@ static __poll_t smc_poll(struct file *file, struct socket *sock,
 	if ((sk->sk_state == SMC_INIT) || smc->use_fallback) {
 		/* delegate to CLC child sock */
 		release_sock(sk);
-		mask = smc->clcsock->ops->poll(file, smc->clcsock, wait);
+		mask = smc->clcsock->ops->poll_mask(smc->clcsock, events);
 		lock_sock(sk);
 		sk->sk_err = smc->clcsock->sk->sk_err;
 		if (sk->sk_err) {
@@ -1308,11 +1307,6 @@ static __poll_t smc_poll(struct file *file, struct socket *sock,
 			}
 		}
 	} else {
-		if (sk->sk_state != SMC_CLOSED) {
-			release_sock(sk);
-			sock_poll_wait(file, sk_sleep(sk), wait);
-			lock_sock(sk);
-		}
 		if (sk->sk_err)
 			mask |= EPOLLERR;
 		if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
@@ -1625,7 +1619,7 @@ static const struct proto_ops smc_sock_ops = {
 	.socketpair	= sock_no_socketpair,
 	.accept		= smc_accept,
 	.getname	= smc_getname,
-	.poll		= smc_poll,
+	.poll_mask	= smc_poll_mask,
 	.ioctl		= smc_ioctl,
 	.listen		= smc_listen,
 	.shutdown	= smc_shutdown,
-- 
cgit v1.2.3


From 995191220056300c51ed870a5d5321f91f3eef89 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Thu, 14 Jun 2018 07:37:02 +0800
Subject: sctp: define sctp_packet_gso_append to build GSO frames

Now sctp GSO uses skb_gro_receive() to append the data into head
skb frag_list. However it actually only needs very few code from
skb_gro_receive(). Besides, NAPI_GRO_CB has to be set while most
of its members are not needed here.

This patch is to add sctp_packet_gso_append() to build GSO frames
instead of skb_gro_receive(), and it would avoid many unnecessary
checks and make the code clearer.

Note that sctp will use page frags instead of frag_list to build
GSO frames in another patch. But it may take time, as sctp's GSO
frames may have different size. skb_segment() can only split it
into the frags with the same size, which would break the border
of sctp chunks.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Reviewed-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h |  5 +++++
 net/sctp/output.c          | 28 ++++++++++++++++++----------
 2 files changed, 23 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index ebf809eed33a..dbe1b911a24d 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1133,6 +1133,11 @@ struct sctp_input_cb {
 };
 #define SCTP_INPUT_CB(__skb)	((struct sctp_input_cb *)&((__skb)->cb[0]))
 
+struct sctp_output_cb {
+	struct sk_buff *last;
+};
+#define SCTP_OUTPUT_CB(__skb)	((struct sctp_output_cb *)&((__skb)->cb[0]))
+
 static inline const struct sk_buff *sctp_gso_headskb(const struct sk_buff *skb)
 {
 	const struct sctp_chunk *chunk = SCTP_INPUT_CB(skb)->chunk;
diff --git a/net/sctp/output.c b/net/sctp/output.c
index e672dee302c7..7f849b01ec8e 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -409,6 +409,21 @@ static void sctp_packet_set_owner_w(struct sk_buff *skb, struct sock *sk)
 	refcount_inc(&sk->sk_wmem_alloc);
 }
 
+static void sctp_packet_gso_append(struct sk_buff *head, struct sk_buff *skb)
+{
+	if (SCTP_OUTPUT_CB(head)->last == head)
+		skb_shinfo(head)->frag_list = skb;
+	else
+		SCTP_OUTPUT_CB(head)->last->next = skb;
+	SCTP_OUTPUT_CB(head)->last = skb;
+
+	head->truesize += skb->truesize;
+	head->data_len += skb->len;
+	head->len += skb->len;
+
+	__skb_header_release(skb);
+}
+
 static int sctp_packet_pack(struct sctp_packet *packet,
 			    struct sk_buff *head, int gso, gfp_t gfp)
 {
@@ -422,7 +437,7 @@ static int sctp_packet_pack(struct sctp_packet *packet,
 
 	if (gso) {
 		skb_shinfo(head)->gso_type = sk->sk_gso_type;
-		NAPI_GRO_CB(head)->last = head;
+		SCTP_OUTPUT_CB(head)->last = head;
 	} else {
 		nskb = head;
 		pkt_size = packet->size;
@@ -503,15 +518,8 @@ merge:
 					 &packet->chunk_list);
 		}
 
-		if (gso) {
-			if (skb_gro_receive(&head, nskb)) {
-				kfree_skb(nskb);
-				return 0;
-			}
-			if (WARN_ON_ONCE(skb_shinfo(head)->gso_segs >=
-					 sk->sk_gso_max_segs))
-				return 0;
-		}
+		if (gso)
+			sctp_packet_gso_append(head, nskb);
 
 		pkt_count++;
 	} while (!list_empty(&packet->chunk_list));
-- 
cgit v1.2.3


From 4fd44a98ffe0d048246efef67ed640fdf2098a62 Mon Sep 17 00:00:00 2001
From: Frank van der Linden <fllinden@amazon.com>
Date: Tue, 12 Jun 2018 23:09:37 +0000
Subject: tcp: verify the checksum of the first data segment in a new
 connection

commit 079096f103fa ("tcp/dccp: install syn_recv requests into ehash
table") introduced an optimization for the handling of child sockets
created for a new TCP connection.

But this optimization passes any data associated with the last ACK of the
connection handshake up the stack without verifying its checksum, because it
calls tcp_child_process(), which in turn calls tcp_rcv_state_process()
directly.  These lower-level processing functions do not do any checksum
verification.

Insert a tcp_checksum_complete call in the TCP_NEW_SYN_RECEIVE path to
fix this.

Fixes: 079096f103fa ("tcp/dccp: install syn_recv requests into ehash table")
Signed-off-by: Frank van der Linden <fllinden@amazon.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Tested-by: Balbir Singh <bsingharora@gmail.com>
Reviewed-by: Balbir Singh <bsingharora@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c | 4 ++++
 net/ipv6/tcp_ipv6.c | 4 ++++
 2 files changed, 8 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index fed3f1c66167..bea17f1e8302 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1730,6 +1730,10 @@ process:
 			reqsk_put(req);
 			goto discard_it;
 		}
+		if (tcp_checksum_complete(skb)) {
+			reqsk_put(req);
+			goto csum_error;
+		}
 		if (unlikely(sk->sk_state != TCP_LISTEN)) {
 			inet_csk_reqsk_queue_drop_and_put(sk, req);
 			goto lookup;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index b620d9b72e59..7efa9fd7e109 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1479,6 +1479,10 @@ process:
 			reqsk_put(req);
 			goto discard_it;
 		}
+		if (tcp_checksum_complete(skb)) {
+			reqsk_put(req);
+			goto csum_error;
+		}
 		if (unlikely(sk->sk_state != TCP_LISTEN)) {
 			inet_csk_reqsk_queue_drop_and_put(sk, req);
 			goto lookup;
-- 
cgit v1.2.3


From 90904ff5f958a215cc3d26f957a46e80fa178470 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <g.nault@alphalink.fr>
Date: Wed, 13 Jun 2018 15:09:18 +0200
Subject: l2tp: fix pseudo-wire type for sessions created by pppol2tp_connect()

Define cfg.pw_type so that the new session is created with its .pwtype
field properly set (L2TP_PWTYPE_PPP).

Not setting the pseudo-wire type had several annoying effects:

  * Invalid value returned in the L2TP_ATTR_PW_TYPE attribute when
    dumping sessions with the netlink API.

  * Impossibility to delete the session using the netlink API (because
    l2tp_nl_cmd_session_delete() gets the deletion callback function
    from an array indexed by the session's pseudo-wire type).

Also, there are several cases where we should check a session's
pseudo-wire type. For example, pppol2tp_connect() should refuse to
connect a session that is not PPPoL2TP, but that requires the session's
.pwtype field to be properly set.

Fixes: f7faffa3ff8e ("l2tp: Add L2TPv3 protocol support")
Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_ppp.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index b56cb1df4fc0..270a0a999eaf 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -751,6 +751,7 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
 		/* Default MTU must allow space for UDP/L2TP/PPP headers */
 		cfg.mtu = 1500 - PPPOL2TP_HEADER_OVERHEAD;
 		cfg.mru = cfg.mtu;
+		cfg.pw_type = L2TP_PWTYPE_PPP;
 
 		session = l2tp_session_create(sizeof(struct pppol2tp_session),
 					      tunnel, session_id,
-- 
cgit v1.2.3


From 7ac6ab1f8a38ba7f8d97f95475bb6a2575db4658 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <g.nault@alphalink.fr>
Date: Wed, 13 Jun 2018 15:09:19 +0200
Subject: l2tp: only accept PPP sessions in pppol2tp_connect()

l2tp_session_priv() returns a struct pppol2tp_session pointer only for
PPPoL2TP sessions. In particular, if the session is an L2TP_PWTYPE_ETH
pseudo-wire, l2tp_session_priv() returns a pointer to an l2tp_eth_sess
structure, which is much smaller than struct pppol2tp_session. This
leads to invalid memory dereference when trying to lock ps->sk_lock.

Fixes: d9e31d17ceba ("l2tp: Add L2TP ethernet pseudowire support")
Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_ppp.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'net')

diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 270a0a999eaf..8b3b6947a07d 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -734,6 +734,12 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
 	session = l2tp_session_get(sock_net(sk), tunnel, session_id);
 	if (session) {
 		drop_refcnt = true;
+
+		if (session->pwtype != L2TP_PWTYPE_PPP) {
+			error = -EPROTOTYPE;
+			goto end;
+		}
+
 		ps = l2tp_session_priv(session);
 
 		/* Using a pre-existing session is fine as long as it hasn't
-- 
cgit v1.2.3


From 3e1bc8bf974e2d4e7beb842a4c801c2542eff3bd Mon Sep 17 00:00:00 2001
From: Guillaume Nault <g.nault@alphalink.fr>
Date: Wed, 13 Jun 2018 15:09:20 +0200
Subject: l2tp: prevent pppol2tp_connect() from creating kernel sockets

If 'fd' is negative, l2tp_tunnel_create() creates a tunnel socket using
the configuration passed in 'tcfg'. Currently, pppol2tp_connect() sets
the relevant fields to zero, tricking l2tp_tunnel_create() into setting
up an unusable kernel socket.

We can't set 'tcfg' with the required fields because there's no way to
get them from the current connect() parameters. So let's restrict
kernel sockets creation to the netlink API, which is the original use
case.

Fixes: 789a4a2c61d8 ("l2tp: Add support for static unmanaged L2TPv3 tunnels")
Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_ppp.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'net')

diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 8b3b6947a07d..1b24f76ae210 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -701,6 +701,15 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
 				.encap = L2TP_ENCAPTYPE_UDP,
 				.debug = 0,
 			};
+
+			/* Prevent l2tp_tunnel_register() from trying to set up
+			 * a kernel socket.
+			 */
+			if (fd < 0) {
+				error = -EBADF;
+				goto end;
+			}
+
 			error = l2tp_tunnel_create(sock_net(sk), fd, ver, tunnel_id, peer_tunnel_id, &tcfg, &tunnel);
 			if (error < 0)
 				goto end;
-- 
cgit v1.2.3


From bda06be2158c7aa7e41b15500c4d3840369c19a6 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <g.nault@alphalink.fr>
Date: Wed, 13 Jun 2018 15:09:21 +0200
Subject: l2tp: clean up stale tunnel or session in pppol2tp_connect's error
 path

pppol2tp_connect() may create a tunnel or a session. Remove them in
case of error.

Fixes: fd558d186df2 ("l2tp: Split pppol2tp patch into separate l2tp and ppp parts")
Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_ppp.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'net')

diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 1b24f76ae210..f429fed06a1e 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -612,6 +612,8 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
 	u32 session_id, peer_session_id;
 	bool drop_refcnt = false;
 	bool drop_tunnel = false;
+	bool new_session = false;
+	bool new_tunnel = false;
 	int ver = 2;
 	int fd;
 
@@ -722,6 +724,7 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
 				goto end;
 			}
 			drop_tunnel = true;
+			new_tunnel = true;
 		}
 	} else {
 		/* Error if we can't find the tunnel */
@@ -788,6 +791,7 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
 			goto end;
 		}
 		drop_refcnt = true;
+		new_session = true;
 	}
 
 	/* Special case: if source & dest session_id == 0x0000, this
@@ -834,6 +838,12 @@ out_no_ppp:
 		  session->name);
 
 end:
+	if (error) {
+		if (new_session)
+			l2tp_session_delete(session);
+		if (new_tunnel)
+			l2tp_tunnel_delete(tunnel);
+	}
 	if (drop_refcnt)
 		l2tp_session_dec_refcount(session);
 	if (drop_tunnel)
-- 
cgit v1.2.3


From f1693c63ab133d16994cc50f773982b5905af264 Mon Sep 17 00:00:00 2001
From: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Date: Thu, 14 Jun 2018 11:52:34 -0700
Subject: rds: avoid unenecessary cong_update in loop transport

Loop transport which is self loopback, remote port congestion
update isn't relevant. Infact the xmit path already ignores it.
Receive path needs to do the same.

Reported-by: syzbot+4c20b3866171ce8441d2@syzkaller.appspotmail.com
Reviewed-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/loop.c | 1 +
 net/rds/rds.h  | 5 +++++
 net/rds/recv.c | 5 +++++
 3 files changed, 11 insertions(+)

(limited to 'net')

diff --git a/net/rds/loop.c b/net/rds/loop.c
index f2bf78de5688..dac6218a460e 100644
--- a/net/rds/loop.c
+++ b/net/rds/loop.c
@@ -193,4 +193,5 @@ struct rds_transport rds_loop_transport = {
 	.inc_copy_to_user	= rds_message_inc_copy_to_user,
 	.inc_free		= rds_loop_inc_free,
 	.t_name			= "loopback",
+	.t_type			= RDS_TRANS_LOOP,
 };
diff --git a/net/rds/rds.h b/net/rds/rds.h
index b04c333d9d1c..f2272fb8cd45 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -479,6 +479,11 @@ struct rds_notifier {
 	int			n_status;
 };
 
+/* Available as part of RDS core, so doesn't need to participate
+ * in get_preferred transport etc
+ */
+#define	RDS_TRANS_LOOP	3
+
 /**
  * struct rds_transport -  transport specific behavioural hooks
  *
diff --git a/net/rds/recv.c b/net/rds/recv.c
index dc67458b52f0..192ac6f78ded 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -103,6 +103,11 @@ static void rds_recv_rcvbuf_delta(struct rds_sock *rs, struct sock *sk,
 		rds_stats_add(s_recv_bytes_added_to_socket, delta);
 	else
 		rds_stats_add(s_recv_bytes_removed_from_socket, -delta);
+
+	/* loop transport doesn't send/recv congestion updates */
+	if (rs->rs_transport->t_type == RDS_TRANS_LOOP)
+		return;
+
 	now_congested = rs->rs_rcv_bytes > rds_sk_rcvbuf(rs);
 
 	rdsdebug("rs %p (%pI4:%u) recv bytes %d buf %d "
-- 
cgit v1.2.3


From 3c12d0486856b9eb89c2a9ac336713cba90813e3 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Wed, 6 Jun 2018 10:53:55 +0200
Subject: cfg80211: initialize sinfo in cfg80211_get_station

Most of the implementations behind cfg80211_get_station will not initialize
sinfo to zero before manipulating it. For example, the member "filled",
which indicates the filled in parts of this struct, is often only modified
by enabling certain bits in the bitfield while keeping the remaining bits
in their original state. A caller without a preinitialized sinfo.filled can
then no longer decide which parts of sinfo were filled in by
cfg80211_get_station (or actually the underlying implementations).

cfg80211_get_station must therefore take care that sinfo is initialized to
zero. Otherwise, the caller may tries to read information which was not
filled in and which must therefore also be considered uninitialized. In
batadv_v_elp_get_throughput's case, an invalid "random" expected throughput
may be stored for this neighbor and thus the B.A.T.M.A.N V algorithm may
switch to non-optimal neighbors for certain destinations.

Fixes: 7406353d43c8 ("cfg80211: implement cfg80211_get_station cfg80211 API")
Reported-by: Thomas Lauer <holminateur@gmail.com>
Reported-by: Marcel Schmidt <ff.z-casparistrasse@mailbox.org>
Cc: b.a.t.m.a.n@lists.open-mesh.org
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
---
 net/wireless/util.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/wireless/util.c b/net/wireless/util.c
index b5bb1c309914..3c654cd7ba56 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -1746,6 +1746,8 @@ int cfg80211_get_station(struct net_device *dev, const u8 *mac_addr,
 	if (!rdev->ops->get_station)
 		return -EOPNOTSUPP;
 
+	memset(sinfo, 0, sizeof(*sinfo));
+
 	return rdev_get_station(rdev, dev, mac_addr, sinfo);
 }
 EXPORT_SYMBOL(cfg80211_get_station);
-- 
cgit v1.2.3


From dc8b274f0952f604d72b10698cde6887321a669f Mon Sep 17 00:00:00 2001
From: Toke Høiland-Jørgensen <toke@toke.dk>
Date: Fri, 25 May 2018 14:29:21 +0200
Subject: mac80211: Move up init of TXQs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On init, ieee80211_if_add() dumps the interface. Since that now includes a
dump of the TXQ state, we need to initialise that before the dump happens.
So move up the TXQ initialisation to to before the call to
ieee80211_if_add().

Fixes: 52539ca89f36 ("cfg80211: Expose TXQ stats and parameters to userspace")
Reported-by: Niklas Cassel <niklas.cassel@linaro.org>
Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
Tested-by: Niklas Cassel <niklas.cassel@linaro.org>
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
---
 net/mac80211/main.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 4d2e797e3f16..722f3d9fb416 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -1098,6 +1098,10 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 
 	ieee80211_led_init(local);
 
+	result = ieee80211_txq_setup_flows(local);
+	if (result)
+		goto fail_flows;
+
 	rtnl_lock();
 
 	result = ieee80211_init_rate_ctrl_alg(local,
@@ -1120,10 +1124,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 
 	rtnl_unlock();
 
-	result = ieee80211_txq_setup_flows(local);
-	if (result)
-		goto fail_flows;
-
 #ifdef CONFIG_INET
 	local->ifa_notifier.notifier_call = ieee80211_ifa_changed;
 	result = register_inetaddr_notifier(&local->ifa_notifier);
@@ -1149,8 +1149,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 #if defined(CONFIG_INET) || defined(CONFIG_IPV6)
  fail_ifa:
 #endif
-	ieee80211_txq_teardown_flows(local);
- fail_flows:
 	rtnl_lock();
 	rate_control_deinitialize(local);
 	ieee80211_remove_interfaces(local);
@@ -1158,6 +1156,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 	rtnl_unlock();
 	ieee80211_led_exit(local);
 	ieee80211_wep_free(local);
+	ieee80211_txq_teardown_flows(local);
+ fail_flows:
 	destroy_workqueue(local->workqueue);
  fail_workqueue:
 	wiphy_unregister(local->hw.wiphy);
-- 
cgit v1.2.3


From bf2b61a6838f19cbc33f6732715012c483fa3795 Mon Sep 17 00:00:00 2001
From: Dedy Lansky <dlansky@codeaurora.org>
Date: Fri, 15 Jun 2018 13:05:01 +0200
Subject: cfg80211: fix rcu in cfg80211_unregister_wdev

Callers of cfg80211_unregister_wdev can free the wdev object
immediately after this function returns. This may crash the kernel
because this wdev object is still in use by other threads.
Add synchronize_rcu() after list_del_rcu to make sure wdev object can
be safely freed.

Signed-off-by: Dedy Lansky <dlansky@codeaurora.org>
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
---
 net/wireless/core.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/wireless/core.c b/net/wireless/core.c
index 5fe35aafdd9c..48e8097339ab 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -1012,6 +1012,7 @@ void cfg80211_unregister_wdev(struct wireless_dev *wdev)
 	nl80211_notify_iface(rdev, wdev, NL80211_CMD_DEL_INTERFACE);
 
 	list_del_rcu(&wdev->list);
+	synchronize_rcu();
 	rdev->devlist_generation++;
 
 	switch (wdev->iftype) {
-- 
cgit v1.2.3


From 6eba08c3626bca42b3cb0c9d43ac37ab11b4be1a Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Fri, 15 Jun 2018 16:23:35 +0300
Subject: ipv6: Only emit append events for appended routes

Current code will emit an append event in the FIB notification chain for
any route added with NLM_F_APPEND set, even if the route was not
appended to any existing route.

This is inconsistent with IPv4 where such an event is only emitted when
the new route is appended after an existing one.

Align IPv6 behavior with IPv4, thereby allowing listeners to more easily
handle these events.

Fixes: f34436a43092 ("net/ipv6: Simplify route replace and appending into multipath route")
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Acked-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_fib.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 7aa4c41a3bd9..39d1d487eca2 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -934,6 +934,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
 {
 	struct fib6_info *leaf = rcu_dereference_protected(fn->leaf,
 				    lockdep_is_held(&rt->fib6_table->tb6_lock));
+	enum fib_event_type event = FIB_EVENT_ENTRY_ADD;
 	struct fib6_info *iter = NULL, *match = NULL;
 	struct fib6_info __rcu **ins;
 	int replace = (info->nlh &&
@@ -1013,6 +1014,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
 				       "Can not append to a REJECT route");
 			return -EINVAL;
 		}
+		event = FIB_EVENT_ENTRY_APPEND;
 		rt->fib6_nsiblings = match->fib6_nsiblings;
 		list_add_tail(&rt->fib6_siblings, &match->fib6_siblings);
 		match->fib6_nsiblings++;
@@ -1034,15 +1036,12 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
 	 *	insert node
 	 */
 	if (!replace) {
-		enum fib_event_type event;
-
 		if (!add)
 			pr_warn("NLM_F_CREATE should be set when creating new route\n");
 
 add:
 		nlflags |= NLM_F_CREATE;
 
-		event = append ? FIB_EVENT_ENTRY_APPEND : FIB_EVENT_ENTRY_ADD;
 		err = call_fib6_entry_notifiers(info->nl_net, event, rt,
 						extack);
 		if (err)
-- 
cgit v1.2.3


From de9bada5d389903f4faf33980e6a95a2911c7e6d Mon Sep 17 00:00:00 2001
From: Guillaume Nault <g.nault@alphalink.fr>
Date: Fri, 15 Jun 2018 15:39:17 +0200
Subject: l2tp: reject creation of non-PPP sessions on L2TPv2 tunnels

The /proc/net/pppol2tp handlers (pppol2tp_seq_*()) iterate over all
L2TPv2 tunnels, and rightfully expect that only PPP sessions can be
found there. However, l2tp_netlink accepts creating Ethernet sessions
regardless of the underlying tunnel version.

This confuses pppol2tp_seq_session_show(), which expects that
l2tp_session_priv() returns a pppol2tp_session structure. When the
session is an Ethernet pseudo-wire, a struct l2tp_eth_sess is returned
instead. This leads to invalid memory access when
pppol2tp_session_get_sock() later tries to dereference ps->sk.

Fixes: d9e31d17ceba ("l2tp: Add L2TP ethernet pseudowire support")
Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_netlink.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'net')

diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index 6616c9fd292f..5b9900889e31 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -553,6 +553,12 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
 		goto out_tunnel;
 	}
 
+	/* L2TPv2 only accepts PPP pseudo-wires */
+	if (tunnel->version == 2 && cfg.pw_type != L2TP_PWTYPE_PPP) {
+		ret = -EPROTONOSUPPORT;
+		goto out_tunnel;
+	}
+
 	if (tunnel->version > 2) {
 		if (info->attrs[L2TP_ATTR_DATA_SEQ])
 			cfg.data_seq = nla_get_u8(info->attrs[L2TP_ATTR_DATA_SEQ]);
-- 
cgit v1.2.3


From ecd012e45ab5fd76ed57546865897ce35920f56b Mon Sep 17 00:00:00 2001
From: Guillaume Nault <g.nault@alphalink.fr>
Date: Fri, 15 Jun 2018 15:39:19 +0200
Subject: l2tp: filter out non-PPP sessions in pppol2tp_tunnel_ioctl()

pppol2tp_tunnel_ioctl() can act on an L2TPv3 tunnel, in which case
'session' may be an Ethernet pseudo-wire.

However, pppol2tp_session_ioctl() expects a PPP pseudo-wire, as it
assumes l2tp_session_priv() points to a pppol2tp_session structure. For
an Ethernet pseudo-wire l2tp_session_priv() points to an l2tp_eth_sess
structure instead, making pppol2tp_session_ioctl() access invalid
memory.

Fixes: d9e31d17ceba ("l2tp: Add L2TP ethernet pseudowire support")
Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_ppp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index f429fed06a1e..55188382845c 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -1201,7 +1201,7 @@ static int pppol2tp_tunnel_ioctl(struct l2tp_tunnel *tunnel,
 				l2tp_session_get(sock_net(sk), tunnel,
 						 stats.session_id);
 
-			if (session) {
+			if (session && session->pwtype == L2TP_PWTYPE_PPP) {
 				err = pppol2tp_session_ioctl(session, cmd,
 							     arg);
 				l2tp_session_dec_refcount(session);
-- 
cgit v1.2.3


From a447da7d00410278c90d3576782a43f8b675d7be Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 15 Jun 2018 03:07:45 +0200
Subject: tls: fix use-after-free in tls_push_record

syzkaller managed to trigger a use-after-free in tls like the
following:

  BUG: KASAN: use-after-free in tls_push_record.constprop.15+0x6a2/0x810 [tls]
  Write of size 1 at addr ffff88037aa08000 by task a.out/2317

  CPU: 3 PID: 2317 Comm: a.out Not tainted 4.17.0+ #144
  Hardware name: LENOVO 20FBCTO1WW/20FBCTO1WW, BIOS N1FET47W (1.21 ) 11/28/2016
  Call Trace:
   dump_stack+0x71/0xab
   print_address_description+0x6a/0x280
   kasan_report+0x258/0x380
   ? tls_push_record.constprop.15+0x6a2/0x810 [tls]
   tls_push_record.constprop.15+0x6a2/0x810 [tls]
   tls_sw_push_pending_record+0x2e/0x40 [tls]
   tls_sk_proto_close+0x3fe/0x710 [tls]
   ? tcp_check_oom+0x4c0/0x4c0
   ? tls_write_space+0x260/0x260 [tls]
   ? kmem_cache_free+0x88/0x1f0
   inet_release+0xd6/0x1b0
   __sock_release+0xc0/0x240
   sock_close+0x11/0x20
   __fput+0x22d/0x660
   task_work_run+0x114/0x1a0
   do_exit+0x71a/0x2780
   ? mm_update_next_owner+0x650/0x650
   ? handle_mm_fault+0x2f5/0x5f0
   ? __do_page_fault+0x44f/0xa50
   ? mm_fault_error+0x2d0/0x2d0
   do_group_exit+0xde/0x300
   __x64_sys_exit_group+0x3a/0x50
   do_syscall_64+0x9a/0x300
   ? page_fault+0x8/0x30
   entry_SYSCALL_64_after_hwframe+0x44/0xa9

This happened through fault injection where aead_req allocation in
tls_do_encryption() eventually failed and we returned -ENOMEM from
the function. Turns out that the use-after-free is triggered from
tls_sw_sendmsg() in the second tls_push_record(). The error then
triggers a jump to waiting for memory in sk_stream_wait_memory()
resp. returning immediately in case of MSG_DONTWAIT. What follows is
the trim_both_sgl(sk, orig_size), which drops elements from the sg
list added via tls_sw_sendmsg(). Now the use-after-free gets triggered
when the socket is being closed, where tls_sk_proto_close() callback
is invoked. The tls_complete_pending_work() will figure that there's
a pending closed tls record to be flushed and thus calls into the
tls_push_pending_closed_record() from there. ctx->push_pending_record()
is called from the latter, which is the tls_sw_push_pending_record()
from sw path. This again calls into tls_push_record(). And here the
tls_fill_prepend() will panic since the buffer address has been freed
earlier via trim_both_sgl(). One way to fix it is to move the aead
request allocation out of tls_do_encryption() early into tls_push_record().
This means we don't prep the tls header and advance state to the
TLS_PENDING_CLOSED_RECORD before allocation which could potentially
fail happened. That fixes the issue on my side.

Fixes: 3c4d7559159b ("tls: kernel TLS support")
Reported-by: syzbot+5c74af81c547738e1684@syzkaller.appspotmail.com
Reported-by: syzbot+709f2810a6a05f11d4d3@syzkaller.appspotmail.com
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Dave Watson <davejwatson@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tls/tls_sw.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 34895b7c132d..2945a3bd538c 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -191,18 +191,12 @@ static void tls_free_both_sg(struct sock *sk)
 }
 
 static int tls_do_encryption(struct tls_context *tls_ctx,
-			     struct tls_sw_context_tx *ctx, size_t data_len,
-			     gfp_t flags)
+			     struct tls_sw_context_tx *ctx,
+			     struct aead_request *aead_req,
+			     size_t data_len)
 {
-	unsigned int req_size = sizeof(struct aead_request) +
-		crypto_aead_reqsize(ctx->aead_send);
-	struct aead_request *aead_req;
 	int rc;
 
-	aead_req = kzalloc(req_size, flags);
-	if (!aead_req)
-		return -ENOMEM;
-
 	ctx->sg_encrypted_data[0].offset += tls_ctx->tx.prepend_size;
 	ctx->sg_encrypted_data[0].length -= tls_ctx->tx.prepend_size;
 
@@ -219,7 +213,6 @@ static int tls_do_encryption(struct tls_context *tls_ctx,
 	ctx->sg_encrypted_data[0].offset -= tls_ctx->tx.prepend_size;
 	ctx->sg_encrypted_data[0].length += tls_ctx->tx.prepend_size;
 
-	kfree(aead_req);
 	return rc;
 }
 
@@ -228,8 +221,14 @@ static int tls_push_record(struct sock *sk, int flags,
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
+	struct aead_request *req;
 	int rc;
 
+	req = kzalloc(sizeof(struct aead_request) +
+		      crypto_aead_reqsize(ctx->aead_send), sk->sk_allocation);
+	if (!req)
+		return -ENOMEM;
+
 	sg_mark_end(ctx->sg_plaintext_data + ctx->sg_plaintext_num_elem - 1);
 	sg_mark_end(ctx->sg_encrypted_data + ctx->sg_encrypted_num_elem - 1);
 
@@ -245,15 +244,14 @@ static int tls_push_record(struct sock *sk, int flags,
 	tls_ctx->pending_open_record_frags = 0;
 	set_bit(TLS_PENDING_CLOSED_RECORD, &tls_ctx->flags);
 
-	rc = tls_do_encryption(tls_ctx, ctx, ctx->sg_plaintext_size,
-			       sk->sk_allocation);
+	rc = tls_do_encryption(tls_ctx, ctx, req, ctx->sg_plaintext_size);
 	if (rc < 0) {
 		/* If we are called from write_space and
 		 * we fail, we need to set this SOCK_NOSPACE
 		 * to trigger another write_space in the future.
 		 */
 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
-		return rc;
+		goto out_req;
 	}
 
 	free_sg(sk, ctx->sg_plaintext_data, &ctx->sg_plaintext_num_elem,
@@ -268,6 +266,8 @@ static int tls_push_record(struct sock *sk, int flags,
 		tls_err_abort(sk, EBADMSG);
 
 	tls_advance_record_sn(sk, &tls_ctx->tx);
+out_req:
+	kfree(req);
 	return rc;
 }
 
-- 
cgit v1.2.3


From 06030dbaf3b6c5801dcdb7fe4fbab3b91c8da84a Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 15 Jun 2018 03:07:46 +0200
Subject: tls: fix waitall behavior in tls_sw_recvmsg

Current behavior in tls_sw_recvmsg() is to wait for incoming tls
messages and copy up to exactly len bytes of data that the user
provided. This is problematic in the sense that i) if no packet
is currently queued in strparser we keep waiting until one has been
processed and pushed into tls receive layer for tls_wait_data() to
wake up and push the decrypted bits to user space. Given after
tls decryption, we're back at streaming data, use sock_rcvlowat()
hint from tcp socket instead. Retain current behavior with MSG_WAITALL
flag and otherwise use the hint target for breaking the loop and
returning to application. This is done if currently no ctx->recv_pkt
is ready, otherwise continue to process it from our strparser
backlog.

Fixes: c46234ebb4d1 ("tls: RX path for ktls")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Dave Watson <davejwatson@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tls/tls_sw.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 2945a3bd538c..f127fac88acf 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -754,7 +754,7 @@ int tls_sw_recvmsg(struct sock *sk,
 	struct sk_buff *skb;
 	ssize_t copied = 0;
 	bool cmsg = false;
-	int err = 0;
+	int target, err = 0;
 	long timeo;
 
 	flags |= nonblock;
@@ -764,6 +764,7 @@ int tls_sw_recvmsg(struct sock *sk,
 
 	lock_sock(sk);
 
+	target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
 	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
 	do {
 		bool zc = false;
@@ -856,6 +857,9 @@ fallback_to_reg_recv:
 					goto recv_end;
 			}
 		}
+		/* If we have a new message from strparser, continue now. */
+		if (copied >= target && !ctx->recv_pkt)
+			break;
 	} while (len);
 
 recv_end:
-- 
cgit v1.2.3


From f6a6f203d507aae3a06a8de79c6f0ecc4658b81c Mon Sep 17 00:00:00 2001
From: Roopa Prabhu <roopa@cumulusnetworks.com>
Date: Tue, 12 Jun 2018 21:26:10 -0700
Subject: neighbour: skip NTF_EXT_LEARNED entries during forced gc

Commit 9ce33e46531d ("neighbour: support for NTF_EXT_LEARNED flag")
added support for NTF_EXT_LEARNED for neighbour entries.
NTF_EXT_LEARNED entries are neigh entries managed by control
plane (eg: Ethernet VPN implementation in FRR routing suite).
Periodic gc already excludes these entries. This patch extends
it to forced gc which the earlier patch missed.

Fixes: 9ce33e46531d ("neighbour: support for NTF_EXT_LEARNED flag")
Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/neighbour.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index a7a9c3d738ba..8e3fda9e725c 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -119,13 +119,14 @@ unsigned long neigh_rand_reach_time(unsigned long base)
 EXPORT_SYMBOL(neigh_rand_reach_time);
 
 
-static bool neigh_del(struct neighbour *n, __u8 state,
+static bool neigh_del(struct neighbour *n, __u8 state, __u8 flags,
 		      struct neighbour __rcu **np, struct neigh_table *tbl)
 {
 	bool retval = false;
 
 	write_lock(&n->lock);
-	if (refcount_read(&n->refcnt) == 1 && !(n->nud_state & state)) {
+	if (refcount_read(&n->refcnt) == 1 && !(n->nud_state & state) &&
+	    !(n->flags & flags)) {
 		struct neighbour *neigh;
 
 		neigh = rcu_dereference_protected(n->next,
@@ -157,7 +158,7 @@ bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl)
 	while ((n = rcu_dereference_protected(*np,
 					      lockdep_is_held(&tbl->lock)))) {
 		if (n == ndel)
-			return neigh_del(n, 0, np, tbl);
+			return neigh_del(n, 0, 0, np, tbl);
 		np = &n->next;
 	}
 	return false;
@@ -185,7 +186,8 @@ static int neigh_forced_gc(struct neigh_table *tbl)
 			 * - nobody refers to it.
 			 * - it is not permanent
 			 */
-			if (neigh_del(n, NUD_PERMANENT, np, tbl)) {
+			if (neigh_del(n, NUD_PERMANENT, NTF_EXT_LEARNED, np,
+				      tbl)) {
 				shrunk = 1;
 				continue;
 			}
-- 
cgit v1.2.3