From 902c0b1887522a099aa4e1e6b4b476c2fe5dd13e Mon Sep 17 00:00:00 2001 From: Juhee Kang Date: Mon, 4 Oct 2021 21:14:38 +0900 Subject: netfilter: xt_IDLETIMER: fix panic that occurs when timer_type has garbage value Currently, when the rule related to IDLETIMER is added, idletimer_tg timer structure is initialized by kmalloc on executing idletimer_tg_create function. However, in this process timer->timer_type is not defined to a specific value. Thus, timer->timer_type has garbage value and it occurs kernel panic. So, this commit fixes the panic by initializing timer->timer_type using kzalloc instead of kmalloc. Test commands: # iptables -A OUTPUT -j IDLETIMER --timeout 1 --label test $ cat /sys/class/xt_idletimer/timers/test Killed Splat looks like: BUG: KASAN: user-memory-access in alarm_expires_remaining+0x49/0x70 Read of size 8 at addr 0000002e8c7bc4c8 by task cat/917 CPU: 12 PID: 917 Comm: cat Not tainted 5.14.0+ #3 79940a339f71eb14fc81aee1757a20d5bf13eb0e Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.13.0-1ubuntu1.1 04/01/2014 Call Trace: dump_stack_lvl+0x6e/0x9c kasan_report.cold+0x112/0x117 ? alarm_expires_remaining+0x49/0x70 __asan_load8+0x86/0xb0 alarm_expires_remaining+0x49/0x70 idletimer_tg_show+0xe5/0x19b [xt_IDLETIMER 11219304af9316a21bee5ba9d58f76a6b9bccc6d] dev_attr_show+0x3c/0x60 sysfs_kf_seq_show+0x11d/0x1f0 ? device_remove_bin_file+0x20/0x20 kernfs_seq_show+0xa4/0xb0 seq_read_iter+0x29c/0x750 kernfs_fop_read_iter+0x25a/0x2c0 ? __fsnotify_parent+0x3d1/0x570 ? iov_iter_init+0x70/0x90 new_sync_read+0x2a7/0x3d0 ? __x64_sys_llseek+0x230/0x230 ? rw_verify_area+0x81/0x150 vfs_read+0x17b/0x240 ksys_read+0xd9/0x180 ? vfs_write+0x460/0x460 ? do_syscall_64+0x16/0xc0 ? lockdep_hardirqs_on+0x79/0x120 __x64_sys_read+0x43/0x50 do_syscall_64+0x3b/0xc0 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7f0cdc819142 Code: c0 e9 c2 fe ff ff 50 48 8d 3d 3a ca 0a 00 e8 f5 19 02 00 0f 1f 44 00 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 0f 05 <48> 3d 00 f0 ff ff 77 56 c3 0f 1f 44 00 00 48 83 ec 28 48 89 54 24 RSP: 002b:00007fff28eee5b8 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 RAX: ffffffffffffffda RBX: 0000000000020000 RCX: 00007f0cdc819142 RDX: 0000000000020000 RSI: 00007f0cdc032000 RDI: 0000000000000003 RBP: 00007f0cdc032000 R08: 00007f0cdc031010 R09: 0000000000000000 R10: 0000000000000022 R11: 0000000000000246 R12: 00005607e9ee31f0 R13: 0000000000000003 R14: 0000000000020000 R15: 0000000000020000 Fixes: 68983a354a65 ("netfilter: xtables: Add snapshot of hardidletimer target") Signed-off-by: Juhee Kang Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_IDLETIMER.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c index 7b2f359bfce4..2f7cf5ecebf4 100644 --- a/net/netfilter/xt_IDLETIMER.c +++ b/net/netfilter/xt_IDLETIMER.c @@ -137,7 +137,7 @@ static int idletimer_tg_create(struct idletimer_tg_info *info) { int ret; - info->timer = kmalloc(sizeof(*info->timer), GFP_KERNEL); + info->timer = kzalloc(sizeof(*info->timer), GFP_KERNEL); if (!info->timer) { ret = -ENOMEM; goto out; -- cgit v1.2.3 From 77076934afdcd46516caf18ed88b2f88025c9ddb Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Tue, 5 Oct 2021 22:54:54 +0200 Subject: netfilter: Kconfig: use 'default y' instead of 'm' for bool config option This option, NF_CONNTRACK_SECMARK, is a bool, so it can never be 'm'. Fixes: 33b8e77605620 ("[NETFILTER]: Add CONFIG_NETFILTER_ADVANCED option") Signed-off-by: Vegard Nossum Signed-off-by: Pablo Neira Ayuso --- net/netfilter/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 54395266339d..92a747896f80 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -109,7 +109,7 @@ config NF_CONNTRACK_MARK config NF_CONNTRACK_SECMARK bool 'Connection tracking security mark support' depends on NETWORK_SECMARK - default m if NETFILTER_ADVANCED=n + default y if NETFILTER_ADVANCED=n help This option enables security markings to be applied to connections. Typically they are copied to connections from -- cgit v1.2.3 From 68a3765c659f809dcaac20030853a054646eb739 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 6 Oct 2021 16:20:34 +0200 Subject: netfilter: nf_tables: skip netdev events generated on netns removal syzbot reported following (harmless) WARN: WARNING: CPU: 1 PID: 2648 at net/netfilter/core.c:468 nft_netdev_unregister_hooks net/netfilter/nf_tables_api.c:230 [inline] nf_tables_unregister_hook include/net/netfilter/nf_tables.h:1090 [inline] __nft_release_basechain+0x138/0x640 net/netfilter/nf_tables_api.c:9524 nft_netdev_event net/netfilter/nft_chain_filter.c:351 [inline] nf_tables_netdev_event+0x521/0x8a0 net/netfilter/nft_chain_filter.c:382 reproducer: unshare -n bash -c 'ip link add br0 type bridge; nft add table netdev t ; \ nft add chain netdev t ingress \{ type filter hook ingress device "br0" \ priority 0\; policy drop\; \}' Problem is that when netns device exit hooks create the UNREGISTER event, the .pre_exit hook for nf_tables core has already removed the base hook. Notifier attempts to do this again. The need to do base hook unregister unconditionally was needed in the past, because notifier was last stage where reg->dev dereference was safe. Now that nf_tables does the hook removal in .pre_exit, this isn't needed anymore. Reported-and-tested-by: syzbot+154bd5be532a63aa778b@syzkaller.appspotmail.com Fixes: 767d1216bff825 ("netfilter: nftables: fix possible UAF over chains from packet path in netns") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_chain_filter.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c index 5b02408a920b..3ced0eb6b7c3 100644 --- a/net/netfilter/nft_chain_filter.c +++ b/net/netfilter/nft_chain_filter.c @@ -342,12 +342,6 @@ static void nft_netdev_event(unsigned long event, struct net_device *dev, return; } - /* UNREGISTER events are also happening on netns exit. - * - * Although nf_tables core releases all tables/chains, only this event - * handler provides guarantee that hook->ops.dev is still accessible, - * so we cannot skip exiting net namespaces. - */ __nft_release_basechain(ctx); } @@ -366,6 +360,9 @@ static int nf_tables_netdev_event(struct notifier_block *this, event != NETDEV_CHANGENAME) return NOTIFY_DONE; + if (!check_net(ctx.net)) + return NOTIFY_DONE; + nft_net = nft_pernet(ctx.net); mutex_lock(&nft_net->commit_mutex); list_for_each_entry(table, &nft_net->tables, list) { -- cgit v1.2.3 From a482c5e00a9b5a194085bcd372ac36141028becb Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 12 Oct 2021 08:18:13 -0400 Subject: netfilter: ip6t_rt: fix rt0_hdr parsing in rt_mt6 In rt_mt6(), when it's a nonlinear skb, the 1st skb_header_pointer() only copies sizeof(struct ipv6_rt_hdr) to _route that rh points to. The access by ((const struct rt0_hdr *)rh)->reserved will overflow the buffer. So this access should be moved below the 2nd call to skb_header_pointer(). Besides, after the 2nd skb_header_pointer(), its return value should also be checked, othersize, *rp may cause null-pointer-ref. v1->v2: - clean up some old debugging log. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Dan Carpenter Signed-off-by: Xin Long Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv6/netfilter/ip6t_rt.c | 48 ++++++-------------------------------------- 1 file changed, 6 insertions(+), 42 deletions(-) (limited to 'net') diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c index 733c83d38b30..4ad8b2032f1f 100644 --- a/net/ipv6/netfilter/ip6t_rt.c +++ b/net/ipv6/netfilter/ip6t_rt.c @@ -25,12 +25,7 @@ MODULE_AUTHOR("Andras Kis-Szabo "); static inline bool segsleft_match(u_int32_t min, u_int32_t max, u_int32_t id, bool invert) { - bool r; - pr_debug("segsleft_match:%c 0x%x <= 0x%x <= 0x%x\n", - invert ? '!' : ' ', min, id, max); - r = (id >= min && id <= max) ^ invert; - pr_debug(" result %s\n", r ? "PASS" : "FAILED"); - return r; + return (id >= min && id <= max) ^ invert; } static bool rt_mt6(const struct sk_buff *skb, struct xt_action_param *par) @@ -65,30 +60,6 @@ static bool rt_mt6(const struct sk_buff *skb, struct xt_action_param *par) return false; } - pr_debug("IPv6 RT LEN %u %u ", hdrlen, rh->hdrlen); - pr_debug("TYPE %04X ", rh->type); - pr_debug("SGS_LEFT %u %02X\n", rh->segments_left, rh->segments_left); - - pr_debug("IPv6 RT segsleft %02X ", - segsleft_match(rtinfo->segsleft[0], rtinfo->segsleft[1], - rh->segments_left, - !!(rtinfo->invflags & IP6T_RT_INV_SGS))); - pr_debug("type %02X %02X %02X ", - rtinfo->rt_type, rh->type, - (!(rtinfo->flags & IP6T_RT_TYP) || - ((rtinfo->rt_type == rh->type) ^ - !!(rtinfo->invflags & IP6T_RT_INV_TYP)))); - pr_debug("len %02X %04X %02X ", - rtinfo->hdrlen, hdrlen, - !(rtinfo->flags & IP6T_RT_LEN) || - ((rtinfo->hdrlen == hdrlen) ^ - !!(rtinfo->invflags & IP6T_RT_INV_LEN))); - pr_debug("res %02X %02X %02X ", - rtinfo->flags & IP6T_RT_RES, - ((const struct rt0_hdr *)rh)->reserved, - !((rtinfo->flags & IP6T_RT_RES) && - (((const struct rt0_hdr *)rh)->reserved))); - ret = (segsleft_match(rtinfo->segsleft[0], rtinfo->segsleft[1], rh->segments_left, !!(rtinfo->invflags & IP6T_RT_INV_SGS))) && @@ -107,22 +78,22 @@ static bool rt_mt6(const struct sk_buff *skb, struct xt_action_param *par) reserved), sizeof(_reserved), &_reserved); + if (!rp) { + par->hotdrop = true; + return false; + } ret = (*rp == 0); } - pr_debug("#%d ", rtinfo->addrnr); if (!(rtinfo->flags & IP6T_RT_FST)) { return ret; } else if (rtinfo->flags & IP6T_RT_FST_NSTRICT) { - pr_debug("Not strict "); if (rtinfo->addrnr > (unsigned int)((hdrlen - 8) / 16)) { - pr_debug("There isn't enough space\n"); return false; } else { unsigned int i = 0; - pr_debug("#%d ", rtinfo->addrnr); for (temp = 0; temp < (unsigned int)((hdrlen - 8) / 16); temp++) { @@ -138,26 +109,20 @@ static bool rt_mt6(const struct sk_buff *skb, struct xt_action_param *par) return false; } - if (ipv6_addr_equal(ap, &rtinfo->addrs[i])) { - pr_debug("i=%d temp=%d;\n", i, temp); + if (ipv6_addr_equal(ap, &rtinfo->addrs[i])) i++; - } if (i == rtinfo->addrnr) break; } - pr_debug("i=%d #%d\n", i, rtinfo->addrnr); if (i == rtinfo->addrnr) return ret; else return false; } } else { - pr_debug("Strict "); if (rtinfo->addrnr > (unsigned int)((hdrlen - 8) / 16)) { - pr_debug("There isn't enough space\n"); return false; } else { - pr_debug("#%d ", rtinfo->addrnr); for (temp = 0; temp < rtinfo->addrnr; temp++) { ap = skb_header_pointer(skb, ptr @@ -173,7 +138,6 @@ static bool rt_mt6(const struct sk_buff *skb, struct xt_action_param *par) if (!ipv6_addr_equal(ap, &rtinfo->addrs[temp])) break; } - pr_debug("temp=%d #%d\n", temp, rtinfo->addrnr); if (temp == rtinfo->addrnr && temp == (unsigned int)((hdrlen - 8) / 16)) return ret; -- cgit v1.2.3 From 174c376278949c44aad89c514a6b5db6cee8db59 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Tue, 12 Oct 2021 16:54:37 +0200 Subject: netfilter: ipvs: make global sysctl readonly in non-init netns Because the data pointer of net/ipv4/vs/debug_level is not updated per netns, it must be marked as read-only in non-init netns. Fixes: c6d2d445d8de ("IPVS: netns, final patch enabling network name space.") Signed-off-by: Antoine Tenart Acked-by: Julian Anastasov Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_ctl.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index c25097092a06..29ec3ef63edc 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -4090,6 +4090,11 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs) tbl[idx++].data = &ipvs->sysctl_conn_reuse_mode; tbl[idx++].data = &ipvs->sysctl_schedule_icmp; tbl[idx++].data = &ipvs->sysctl_ignore_tunneled; +#ifdef CONFIG_IP_VS_DEBUG + /* Global sysctls must be ro in non-init netns */ + if (!net_eq(net, &init_net)) + tbl[idx++].mode = 0444; +#endif ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl); if (ipvs->sysctl_hdr == NULL) { -- cgit v1.2.3 From d9aaaf223297f6146d9d7f36caca927c92ab855a Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Sun, 10 Oct 2021 11:24:39 -0700 Subject: netfilter: ebtables: allocate chainstack on CPU local nodes Keep the per-CPU memory allocated for chainstacks local. Signed-off-by: Davidlohr Bueso Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebtables.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 83d1798dfbb4..ba045f35114d 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -926,7 +926,9 @@ static int translate_table(struct net *net, const char *name, return -ENOMEM; for_each_possible_cpu(i) { newinfo->chainstack[i] = - vmalloc(array_size(udc_cnt, sizeof(*(newinfo->chainstack[0])))); + vmalloc_node(array_size(udc_cnt, + sizeof(*(newinfo->chainstack[0]))), + cpu_to_node(i)); if (!newinfo->chainstack[i]) { while (i) vfree(newinfo->chainstack[--i]); -- cgit v1.2.3