From 1cc5ef91d2ff94d2bf2de3b3585423e8a1051cb6 Mon Sep 17 00:00:00 2001 From: Will McVicker Date: Mon, 24 Aug 2020 19:38:32 +0000 Subject: netfilter: ctnetlink: add a range check for l3/l4 protonum The indexes to the nf_nat_l[34]protos arrays come from userspace. So check the tuple's family, e.g. l3num, when creating the conntrack in order to prevent an OOB memory access during setup. Here is an example kernel panic on 4.14.180 when userspace passes in an index greater than NFPROTO_NUMPROTO. Internal error: Oops - BUG: 0 [#1] PREEMPT SMP Modules linked in:... Process poc (pid: 5614, stack limit = 0x00000000a3933121) CPU: 4 PID: 5614 Comm: poc Tainted: G S W O 4.14.180-g051355490483 Hardware name: Qualcomm Technologies, Inc. SM8150 V2 PM8150 Google Inc. MSM task: 000000002a3dfffe task.stack: 00000000a3933121 pc : __cfi_check_fail+0x1c/0x24 lr : __cfi_check_fail+0x1c/0x24 ... Call trace: __cfi_check_fail+0x1c/0x24 name_to_dev_t+0x0/0x468 nfnetlink_parse_nat_setup+0x234/0x258 ctnetlink_parse_nat_setup+0x4c/0x228 ctnetlink_new_conntrack+0x590/0xc40 nfnetlink_rcv_msg+0x31c/0x4d4 netlink_rcv_skb+0x100/0x184 nfnetlink_rcv+0xf4/0x180 netlink_unicast+0x360/0x770 netlink_sendmsg+0x5a0/0x6a4 ___sys_sendmsg+0x314/0x46c SyS_sendmsg+0xb4/0x108 el0_svc_naked+0x34/0x38 This crash is not happening since 5.4+, however, ctnetlink still allows for creating entries with unsupported layer 3 protocol number. Fixes: c1d10adb4a521 ("[NETFILTER]: Add ctnetlink port for nf_conntrack") Signed-off-by: Will McVicker [pablo@netfilter.org: rebased original patch on top of nf.git] Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_netlink.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 832eabecfbdd..d65846aa8059 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1404,7 +1404,8 @@ ctnetlink_parse_tuple_filter(const struct nlattr * const cda[], if (err < 0) return err; - + if (l3num != NFPROTO_IPV4 && l3num != NFPROTO_IPV6) + return -EOPNOTSUPP; tuple->src.l3num = l3num; if (flags & CTA_FILTER_FLAG(CTA_IP_DST) || -- cgit v1.2.3 From 67cc570edaa02016a8685a06a0ee91f05a6277d9 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 27 Aug 2020 19:28:42 +0200 Subject: netfilter: nf_tables: coalesce multiple notifications into one skbuff On x86_64, each notification results in one skbuff allocation which consumes at least 768 bytes due to the skbuff overhead. This patch coalesces several notifications into one single skbuff, so each notification consumes at least ~211 bytes, that ~3.5 times less memory consumption. As a result, this is reducing the chances to exhaust the netlink socket receive buffer. Rule of thumb is that each notification batch only contains netlink messages whose report flag is the same, nfnetlink_send() requires this to do appropriate delivery to userspace, either via unicast (echo mode) or multicast (monitor mode). The skbuff control buffer is used to annotate the report flag for later handling at the new coalescing routine. The batch skbuff notification size is NLMSG_GOODSIZE, using a larger skbuff would allow for more socket receiver buffer savings (to amortize the cost of the skbuff even more), however, going over that size might break userspace applications, so let's be conservative and stick to NLMSG_GOODSIZE. Reported-by: Phil Sutter Acked-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- include/net/netns/nftables.h | 1 + net/netfilter/nf_tables_api.c | 70 +++++++++++++++++++++++++++++++++++-------- 2 files changed, 58 insertions(+), 13 deletions(-) (limited to 'net/netfilter') diff --git a/include/net/netns/nftables.h b/include/net/netns/nftables.h index a1a8d45adb42..6c0806bd8d1e 100644 --- a/include/net/netns/nftables.h +++ b/include/net/netns/nftables.h @@ -8,6 +8,7 @@ struct netns_nftables { struct list_head tables; struct list_head commit_list; struct list_head module_list; + struct list_head notify_list; struct mutex commit_mutex; unsigned int base_seq; u8 gencursor; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index b7dc1cbf40ea..4603b667973a 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -684,6 +684,18 @@ nla_put_failure: return -1; } +struct nftnl_skb_parms { + bool report; +}; +#define NFT_CB(skb) (*(struct nftnl_skb_parms*)&((skb)->cb)) + +static void nft_notify_enqueue(struct sk_buff *skb, bool report, + struct list_head *notify_list) +{ + NFT_CB(skb).report = report; + list_add_tail(&skb->list, notify_list); +} + static void nf_tables_table_notify(const struct nft_ctx *ctx, int event) { struct sk_buff *skb; @@ -715,8 +727,7 @@ static void nf_tables_table_notify(const struct nft_ctx *ctx, int event) goto err; } - nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES, - ctx->report, GFP_KERNEL); + nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list); return; err: nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS); @@ -1468,8 +1479,7 @@ static void nf_tables_chain_notify(const struct nft_ctx *ctx, int event) goto err; } - nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES, - ctx->report, GFP_KERNEL); + nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list); return; err: nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS); @@ -2807,8 +2817,7 @@ static void nf_tables_rule_notify(const struct nft_ctx *ctx, goto err; } - nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES, - ctx->report, GFP_KERNEL); + nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list); return; err: nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS); @@ -3837,8 +3846,7 @@ static void nf_tables_set_notify(const struct nft_ctx *ctx, goto err; } - nfnetlink_send(skb, ctx->net, portid, NFNLGRP_NFTABLES, ctx->report, - gfp_flags); + nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list); return; err: nfnetlink_set_err(ctx->net, portid, NFNLGRP_NFTABLES, -ENOBUFS); @@ -4959,8 +4967,7 @@ static void nf_tables_setelem_notify(const struct nft_ctx *ctx, goto err; } - nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, ctx->report, - GFP_KERNEL); + nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list); return; err: nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, -ENOBUFS); @@ -6275,7 +6282,7 @@ void nft_obj_notify(struct net *net, const struct nft_table *table, goto err; } - nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report, gfp); + nft_notify_enqueue(skb, report, &net->nft.notify_list); return; err: nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, -ENOBUFS); @@ -7085,8 +7092,7 @@ static void nf_tables_flowtable_notify(struct nft_ctx *ctx, goto err; } - nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES, - ctx->report, GFP_KERNEL); + nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list); return; err: nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS); @@ -7695,6 +7701,41 @@ static void nf_tables_commit_release(struct net *net) mutex_unlock(&net->nft.commit_mutex); } +static void nft_commit_notify(struct net *net, u32 portid) +{ + struct sk_buff *batch_skb = NULL, *nskb, *skb; + unsigned char *data; + int len; + + list_for_each_entry_safe(skb, nskb, &net->nft.notify_list, list) { + if (!batch_skb) { +new_batch: + batch_skb = skb; + len = NLMSG_GOODSIZE - skb->len; + list_del(&skb->list); + continue; + } + len -= skb->len; + if (len > 0 && NFT_CB(skb).report == NFT_CB(batch_skb).report) { + data = skb_put(batch_skb, skb->len); + memcpy(data, skb->data, skb->len); + list_del(&skb->list); + kfree_skb(skb); + continue; + } + nfnetlink_send(batch_skb, net, portid, NFNLGRP_NFTABLES, + NFT_CB(batch_skb).report, GFP_KERNEL); + goto new_batch; + } + + if (batch_skb) { + nfnetlink_send(batch_skb, net, portid, NFNLGRP_NFTABLES, + NFT_CB(batch_skb).report, GFP_KERNEL); + } + + WARN_ON_ONCE(!list_empty(&net->nft.notify_list)); +} + static int nf_tables_commit(struct net *net, struct sk_buff *skb) { struct nft_trans *trans, *next; @@ -7897,6 +7938,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) } } + nft_commit_notify(net, NETLINK_CB(skb).portid); nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN); nf_tables_commit_release(net); @@ -8721,6 +8763,7 @@ static int __net_init nf_tables_init_net(struct net *net) INIT_LIST_HEAD(&net->nft.tables); INIT_LIST_HEAD(&net->nft.commit_list); INIT_LIST_HEAD(&net->nft.module_list); + INIT_LIST_HEAD(&net->nft.notify_list); mutex_init(&net->nft.commit_mutex); net->nft.base_seq = 1; net->nft.validate_state = NFT_VALIDATE_SKIP; @@ -8737,6 +8780,7 @@ static void __net_exit nf_tables_exit_net(struct net *net) mutex_unlock(&net->nft.commit_mutex); WARN_ON_ONCE(!list_empty(&net->nft.tables)); WARN_ON_ONCE(!list_empty(&net->nft.module_list)); + WARN_ON_ONCE(!list_empty(&net->nft.notify_list)); } static struct pernet_operations nf_tables_net_ops = { -- cgit v1.2.3 From 6c0d95d1238d944fe54f0bbfc7ec017d78435daa Mon Sep 17 00:00:00 2001 From: Martin Willi Date: Tue, 1 Sep 2020 08:56:19 +0200 Subject: netfilter: ctnetlink: fix mark based dump filtering regression conntrack mark based dump filtering may falsely skip entries if a mask is given: If the mask-based check does not filter out the entry, the else-if check is always true and compares the mark without considering the mask. The if/else-if logic seems wrong. Given that the mask during filter setup is implicitly set to 0xffffffff if not specified explicitly, the mark filtering flags seem to just complicate things. Restore the previously used approach by always matching against a zero mask is no filter mark is given. Fixes: cb8aa9a3affb ("netfilter: ctnetlink: add kernel side filtering for dump") Signed-off-by: Martin Willi Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_netlink.c | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index d65846aa8059..c3a4214dc958 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -851,7 +851,6 @@ static int ctnetlink_done(struct netlink_callback *cb) } struct ctnetlink_filter { - u_int32_t cta_flags; u8 family; u_int32_t orig_flags; @@ -906,10 +905,6 @@ static int ctnetlink_parse_tuple_filter(const struct nlattr * const cda[], struct nf_conntrack_zone *zone, u_int32_t flags); -/* applied on filters */ -#define CTA_FILTER_F_CTA_MARK (1 << 0) -#define CTA_FILTER_F_CTA_MARK_MASK (1 << 1) - static struct ctnetlink_filter * ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family) { @@ -930,14 +925,10 @@ ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family) #ifdef CONFIG_NF_CONNTRACK_MARK if (cda[CTA_MARK]) { filter->mark.val = ntohl(nla_get_be32(cda[CTA_MARK])); - filter->cta_flags |= CTA_FILTER_FLAG(CTA_MARK); - - if (cda[CTA_MARK_MASK]) { + if (cda[CTA_MARK_MASK]) filter->mark.mask = ntohl(nla_get_be32(cda[CTA_MARK_MASK])); - filter->cta_flags |= CTA_FILTER_FLAG(CTA_MARK_MASK); - } else { + else filter->mark.mask = 0xffffffff; - } } else if (cda[CTA_MARK_MASK]) { err = -EINVAL; goto err_filter; @@ -1117,11 +1108,7 @@ static int ctnetlink_filter_match(struct nf_conn *ct, void *data) } #ifdef CONFIG_NF_CONNTRACK_MARK - if ((filter->cta_flags & CTA_FILTER_FLAG(CTA_MARK_MASK)) && - (ct->mark & filter->mark.mask) != filter->mark.val) - goto ignore_entry; - else if ((filter->cta_flags & CTA_FILTER_FLAG(CTA_MARK)) && - ct->mark != filter->mark.val) + if ((ct->mark & filter->mark.mask) != filter->mark.val) goto ignore_entry; #endif -- cgit v1.2.3 From 526e81b990e53e31ba40ba304a2285ffd098721f Mon Sep 17 00:00:00 2001 From: Eelco Chaudron Date: Tue, 1 Sep 2020 16:56:02 +0200 Subject: netfilter: conntrack: nf_conncount_init is failing with IPv6 disabled The openvswitch module fails initialization when used in a kernel without IPv6 enabled. nf_conncount_init() fails because the ct code unconditionally tries to initialize the netns IPv6 related bit, regardless of the build option. The change below ignores the IPv6 part if not enabled. Note that the corresponding _put() function already has this IPv6 configuration check. Fixes: 11efd5cb04a1 ("openvswitch: Support conntrack zone limit") Signed-off-by: Eelco Chaudron Reviewed-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_proto.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net/netfilter') diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 95f79980348c..47e9319d2cf3 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -565,6 +565,7 @@ static int nf_ct_netns_inet_get(struct net *net) int err; err = nf_ct_netns_do_get(net, NFPROTO_IPV4); +#if IS_ENABLED(CONFIG_IPV6) if (err < 0) goto err1; err = nf_ct_netns_do_get(net, NFPROTO_IPV6); @@ -575,6 +576,7 @@ static int nf_ct_netns_inet_get(struct net *net) err2: nf_ct_netns_put(net, NFPROTO_IPV4); err1: +#endif return err; } -- cgit v1.2.3 From 0c92411bb81de9bc516d6924f50289d8d5f880e5 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 3 Sep 2020 19:00:52 +0200 Subject: netfilter: nft_meta: use socket user_ns to retrieve skuid and skgid ... instead of using init_user_ns. Fixes: 96518518cc41 ("netfilter: add nftables") Tested-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_meta.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 7bc6537f3ccb..b37bd02448d8 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -147,11 +147,11 @@ nft_meta_get_eval_skugid(enum nft_meta_keys key, switch (key) { case NFT_META_SKUID: - *dest = from_kuid_munged(&init_user_ns, + *dest = from_kuid_munged(sock_net(sk)->user_ns, sock->file->f_cred->fsuid); break; case NFT_META_SKGID: - *dest = from_kgid_munged(&init_user_ns, + *dest = from_kgid_munged(sock_net(sk)->user_ns, sock->file->f_cred->fsgid); break; default: -- cgit v1.2.3