diff options
Diffstat (limited to 'net/netfilter')
64 files changed, 1272 insertions, 695 deletions
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 614815a3ed73..f0aa4d7ef499 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -142,8 +142,13 @@ obj-$(CONFIG_NFT_FWD_NETDEV) += nft_fwd_netdev.o # flow table infrastructure obj-$(CONFIG_NF_FLOW_TABLE) += nf_flow_table.o nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o \ - nf_flow_table_offload.o + nf_flow_table_offload.o nf_flow_table_xdp.o nf_flow_table-$(CONFIG_NF_FLOW_TABLE_PROCFS) += nf_flow_table_procfs.o +ifeq ($(CONFIG_NF_FLOW_TABLE),m) +nf_flow_table-$(CONFIG_DEBUG_INFO_BTF_MODULES) += nf_flow_table_bpf.o +else ifeq ($(CONFIG_NF_FLOW_TABLE),y) +nf_flow_table-$(CONFIG_DEBUG_INFO_BTF) += nf_flow_table_bpf.o +endif obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 3126911f5042..b9f551f02c81 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -655,11 +655,9 @@ void nf_hook_slow_list(struct list_head *head, struct nf_hook_state *state, const struct nf_hook_entries *e) { struct sk_buff *skb, *next; - struct list_head sublist; + LIST_HEAD(sublist); int ret; - INIT_LIST_HEAD(&sublist); - list_for_each_entry_safe(skb, next, head, list) { skb_list_del_init(skb); ret = nf_hook_slow(skb, state, e, 0); @@ -815,12 +813,21 @@ int __init netfilter_init(void) if (ret < 0) goto err; +#ifdef CONFIG_LWTUNNEL + ret = netfilter_lwtunnel_init(); + if (ret < 0) + goto err_lwtunnel_pernet; +#endif ret = netfilter_log_init(); if (ret < 0) - goto err_pernet; + goto err_log_pernet; return 0; -err_pernet: +err_log_pernet: +#ifdef CONFIG_LWTUNNEL + netfilter_lwtunnel_fini(); +err_lwtunnel_pernet: +#endif unregister_pernet_subsys(&netfilter_net_ops); err: return ret; diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index c7ae4d9bf3d2..61431690cbd5 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -53,12 +53,13 @@ MODULE_DESCRIPTION("core IP set support"); MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET); /* When the nfnl mutex or ip_set_ref_lock is held: */ -#define ip_set_dereference(p) \ - rcu_dereference_protected(p, \ +#define ip_set_dereference(inst) \ + rcu_dereference_protected((inst)->ip_set_list, \ lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET) || \ - lockdep_is_held(&ip_set_ref_lock)) + lockdep_is_held(&ip_set_ref_lock) || \ + (inst)->is_deleted) #define ip_set(inst, id) \ - ip_set_dereference((inst)->ip_set_list)[id] + ip_set_dereference(inst)[id] #define ip_set_ref_netlink(inst,id) \ rcu_dereference_raw((inst)->ip_set_list)[id] #define ip_set_dereference_nfnl(p) \ @@ -1133,7 +1134,7 @@ static int ip_set_create(struct sk_buff *skb, const struct nfnl_info *info, if (!list) goto cleanup; /* nfnl mutex is held, both lists are valid */ - tmp = ip_set_dereference(inst->ip_set_list); + tmp = ip_set_dereference(inst); memcpy(list, tmp, sizeof(struct ip_set *) * inst->ip_set_max); rcu_assign_pointer(inst->ip_set_list, list); /* Make sure all current packets have passed through */ diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index b6d0dcf3a5c3..dc6ddc4abbe2 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1459,18 +1459,18 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, if (ret < 0) goto out_err; - /* Bind the ct retriever */ - RCU_INIT_POINTER(svc->pe, pe); - pe = NULL; - /* Update the virtual service counters */ if (svc->port == FTPPORT) atomic_inc(&ipvs->ftpsvc_counter); else if (svc->port == 0) atomic_inc(&ipvs->nullsvc_counter); - if (svc->pe && svc->pe->conn_out) + if (pe && pe->conn_out) atomic_inc(&ipvs->conn_out_counter); + /* Bind the ct retriever */ + RCU_INIT_POINTER(svc->pe, pe); + pe = NULL; + /* Count only IPv4 services for old get/setsockopt interface */ if (svc->af == AF_INET) ipvs->num_services++; @@ -1846,7 +1846,7 @@ static int ip_vs_zero_all(struct netns_ipvs *ipvs) #ifdef CONFIG_SYSCTL static int -proc_do_defense_mode(struct ctl_table *table, int write, +proc_do_defense_mode(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct netns_ipvs *ipvs = table->extra2; @@ -1873,7 +1873,7 @@ proc_do_defense_mode(struct ctl_table *table, int write, } static int -proc_do_sync_threshold(struct ctl_table *table, int write, +proc_do_sync_threshold(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct netns_ipvs *ipvs = table->extra2; @@ -1901,7 +1901,7 @@ proc_do_sync_threshold(struct ctl_table *table, int write, } static int -proc_do_sync_ports(struct ctl_table *table, int write, +proc_do_sync_ports(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int *valp = table->data; @@ -1924,7 +1924,8 @@ proc_do_sync_ports(struct ctl_table *table, int write, return rc; } -static int ipvs_proc_est_cpumask_set(struct ctl_table *table, void *buffer) +static int ipvs_proc_est_cpumask_set(const struct ctl_table *table, + void *buffer) { struct netns_ipvs *ipvs = table->extra2; cpumask_var_t *valp = table->data; @@ -1962,8 +1963,8 @@ out: return ret; } -static int ipvs_proc_est_cpumask_get(struct ctl_table *table, void *buffer, - size_t size) +static int ipvs_proc_est_cpumask_get(const struct ctl_table *table, + void *buffer, size_t size) { struct netns_ipvs *ipvs = table->extra2; cpumask_var_t *valp = table->data; @@ -1983,7 +1984,7 @@ static int ipvs_proc_est_cpumask_get(struct ctl_table *table, void *buffer, return ret; } -static int ipvs_proc_est_cpulist(struct ctl_table *table, int write, +static int ipvs_proc_est_cpulist(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int ret; @@ -2010,7 +2011,7 @@ static int ipvs_proc_est_cpulist(struct ctl_table *table, int write, return ret; } -static int ipvs_proc_est_nice(struct ctl_table *table, int write, +static int ipvs_proc_est_nice(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct netns_ipvs *ipvs = table->extra2; @@ -2040,7 +2041,7 @@ static int ipvs_proc_est_nice(struct ctl_table *table, int write, return ret; } -static int ipvs_proc_run_estimation(struct ctl_table *table, int write, +static int ipvs_proc_run_estimation(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct netns_ipvs *ipvs = table->extra2; diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index 1e689c714127..83e452916403 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -126,7 +126,7 @@ sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, if (sctph->source != cp->vport || payload_csum || skb->ip_summed == CHECKSUM_PARTIAL) { sctph->source = cp->vport; - if (!skb_is_gso(skb) || !skb_is_gso_sctp(skb)) + if (!skb_is_gso(skb)) sctp_nat_csum(skb, sctph, sctphoff); } else { skb->ip_summed = CHECKSUM_UNNECESSARY; @@ -175,7 +175,7 @@ sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, (skb->ip_summed == CHECKSUM_PARTIAL && !(skb_dst(skb)->dev->features & NETIF_F_SCTP_CRC))) { sctph->dest = cp->dport; - if (!skb_is_gso(skb) || !skb_is_gso_sctp(skb)) + if (!skb_is_gso(skb)) sctp_nat_csum(skb, sctph, sctphoff); } else if (skb->ip_summed != CHECKSUM_PARTIAL) { skb->ip_summed = CHECKSUM_UNNECESSARY; diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c index 8715617b02fe..4890af4dc263 100644 --- a/net/netfilter/nf_conncount.c +++ b/net/netfilter/nf_conncount.c @@ -321,7 +321,6 @@ insert_tree(struct net *net, struct nf_conncount_rb *rbconn; struct nf_conncount_tuple *conn; unsigned int count = 0, gc_count = 0; - u8 keylen = data->keylen; bool do_gc = true; spin_lock_bh(&nf_conncount_locks[hash]); @@ -333,7 +332,7 @@ restart: rbconn = rb_entry(*rbnode, struct nf_conncount_rb, node); parent = *rbnode; - diff = key_diff(key, rbconn->key, keylen); + diff = key_diff(key, rbconn->key, data->keylen); if (diff < 0) { rbnode = &((*rbnode)->rb_left); } else if (diff > 0) { @@ -378,7 +377,7 @@ restart: conn->tuple = *tuple; conn->zone = *zone; - memcpy(rbconn->key, key, sizeof(u32) * keylen); + memcpy(rbconn->key, key, sizeof(u32) * data->keylen); nf_conncount_list_init(&rbconn->list); list_add(&conn->node, &rbconn->list.head); @@ -403,7 +402,6 @@ count_tree(struct net *net, struct rb_node *parent; struct nf_conncount_rb *rbconn; unsigned int hash; - u8 keylen = data->keylen; hash = jhash2(key, data->keylen, conncount_rnd) % CONNCOUNT_SLOTS; root = &data->root[hash]; @@ -414,7 +412,7 @@ count_tree(struct net *net, rbconn = rb_entry(parent, struct nf_conncount_rb, node); - diff = key_diff(key, rbconn->key, keylen); + diff = key_diff(key, rbconn->key, data->keylen); if (diff < 0) { parent = rcu_dereference_raw(parent->rb_left); } else if (diff > 0) { @@ -524,11 +522,10 @@ unsigned int nf_conncount_count(struct net *net, } EXPORT_SYMBOL_GPL(nf_conncount_count); -struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family, - unsigned int keylen) +struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int keylen) { struct nf_conncount_data *data; - int ret, i; + int i; if (keylen % sizeof(u32) || keylen / sizeof(u32) > MAX_KEYLEN || @@ -541,12 +538,6 @@ struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family if (!data) return ERR_PTR(-ENOMEM); - ret = nf_ct_netns_get(net, family); - if (ret < 0) { - kfree(data); - return ERR_PTR(ret); - } - for (i = 0; i < ARRAY_SIZE(data->root); ++i) data->root[i] = RB_ROOT; @@ -583,13 +574,11 @@ static void destroy_tree(struct rb_root *r) } } -void nf_conncount_destroy(struct net *net, unsigned int family, - struct nf_conncount_data *data) +void nf_conncount_destroy(struct net *net, struct nf_conncount_data *data) { unsigned int i; cancel_work_sync(&data->gc_work); - nf_ct_netns_put(net, family); for (i = 0; i < ARRAY_SIZE(data->root); ++i) destroy_tree(&data->root[i]); diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c index d2492d050fe6..4a136fc3a9c0 100644 --- a/net/netfilter/nf_conntrack_bpf.c +++ b/net/netfilter/nf_conntrack_bpf.c @@ -32,7 +32,9 @@ * -EINVAL - Passed NULL for bpf_tuple pointer * -EINVAL - opts->reserved is not 0 * -EINVAL - netns_id is less than -1 - * -EINVAL - opts__sz isn't NF_BPF_CT_OPTS_SZ (12) + * -EINVAL - opts__sz isn't NF_BPF_CT_OPTS_SZ (16) or 12 + * -EINVAL - opts->ct_zone_id set when + opts__sz isn't NF_BPF_CT_OPTS_SZ (16) * -EPROTO - l4proto isn't one of IPPROTO_TCP or IPPROTO_UDP * -ENONET - No network namespace found for netns_id * -ENOENT - Conntrack lookup could not find entry for tuple @@ -42,6 +44,8 @@ * Values: * IPPROTO_TCP, IPPROTO_UDP * @dir: - connection tracking tuple direction. + * @ct_zone_id - connection tracking zone id. + * @ct_zone_dir - connection tracking zone direction. * @reserved - Reserved member, will be reused for more options in future * Values: * 0 @@ -51,11 +55,13 @@ struct bpf_ct_opts { s32 error; u8 l4proto; u8 dir; - u8 reserved[2]; + u16 ct_zone_id; + u8 ct_zone_dir; + u8 reserved[3]; }; enum { - NF_BPF_CT_OPTS_SZ = 12, + NF_BPF_CT_OPTS_SZ = 16, }; static int bpf_nf_ct_tuple_parse(struct bpf_sock_tuple *bpf_tuple, @@ -104,12 +110,21 @@ __bpf_nf_ct_alloc_entry(struct net *net, struct bpf_sock_tuple *bpf_tuple, u32 timeout) { struct nf_conntrack_tuple otuple, rtuple; + struct nf_conntrack_zone ct_zone; struct nf_conn *ct; int err; - if (!opts || !bpf_tuple || opts->reserved[0] || opts->reserved[1] || - opts_len != NF_BPF_CT_OPTS_SZ) + if (!opts || !bpf_tuple) return ERR_PTR(-EINVAL); + if (!(opts_len == NF_BPF_CT_OPTS_SZ || opts_len == 12)) + return ERR_PTR(-EINVAL); + if (opts_len == NF_BPF_CT_OPTS_SZ) { + if (opts->reserved[0] || opts->reserved[1] || opts->reserved[2]) + return ERR_PTR(-EINVAL); + } else { + if (opts->ct_zone_id) + return ERR_PTR(-EINVAL); + } if (unlikely(opts->netns_id < BPF_F_CURRENT_NETNS)) return ERR_PTR(-EINVAL); @@ -130,7 +145,16 @@ __bpf_nf_ct_alloc_entry(struct net *net, struct bpf_sock_tuple *bpf_tuple, return ERR_PTR(-ENONET); } - ct = nf_conntrack_alloc(net, &nf_ct_zone_dflt, &otuple, &rtuple, + if (opts_len == NF_BPF_CT_OPTS_SZ) { + if (opts->ct_zone_dir == 0) + opts->ct_zone_dir = NF_CT_DEFAULT_ZONE_DIR; + nf_ct_zone_init(&ct_zone, + opts->ct_zone_id, opts->ct_zone_dir, 0); + } else { + ct_zone = nf_ct_zone_dflt; + } + + ct = nf_conntrack_alloc(net, &ct_zone, &otuple, &rtuple, GFP_ATOMIC); if (IS_ERR(ct)) goto out; @@ -152,12 +176,21 @@ static struct nf_conn *__bpf_nf_ct_lookup(struct net *net, { struct nf_conntrack_tuple_hash *hash; struct nf_conntrack_tuple tuple; + struct nf_conntrack_zone ct_zone; struct nf_conn *ct; int err; - if (!opts || !bpf_tuple || opts->reserved[0] || opts->reserved[1] || - opts_len != NF_BPF_CT_OPTS_SZ) + if (!opts || !bpf_tuple) return ERR_PTR(-EINVAL); + if (!(opts_len == NF_BPF_CT_OPTS_SZ || opts_len == 12)) + return ERR_PTR(-EINVAL); + if (opts_len == NF_BPF_CT_OPTS_SZ) { + if (opts->reserved[0] || opts->reserved[1] || opts->reserved[2]) + return ERR_PTR(-EINVAL); + } else { + if (opts->ct_zone_id) + return ERR_PTR(-EINVAL); + } if (unlikely(opts->l4proto != IPPROTO_TCP && opts->l4proto != IPPROTO_UDP)) return ERR_PTR(-EPROTO); if (unlikely(opts->netns_id < BPF_F_CURRENT_NETNS)) @@ -174,7 +207,16 @@ static struct nf_conn *__bpf_nf_ct_lookup(struct net *net, return ERR_PTR(-ENONET); } - hash = nf_conntrack_find_get(net, &nf_ct_zone_dflt, &tuple); + if (opts_len == NF_BPF_CT_OPTS_SZ) { + if (opts->ct_zone_dir == 0) + opts->ct_zone_dir = NF_CT_DEFAULT_ZONE_DIR; + nf_ct_zone_init(&ct_zone, + opts->ct_zone_id, opts->ct_zone_dir, 0); + } else { + ct_zone = nf_ct_zone_dflt; + } + + hash = nf_conntrack_find_get(net, &ct_zone, &tuple); if (opts->netns_id >= 0) put_net(net); if (!hash) @@ -245,7 +287,7 @@ __bpf_kfunc_start_defs(); * @opts - Additional options for allocation (documented above) * Cannot be NULL * @opts__sz - Length of the bpf_ct_opts structure - * Must be NF_BPF_CT_OPTS_SZ (12) + * Must be NF_BPF_CT_OPTS_SZ (16) or 12 */ __bpf_kfunc struct nf_conn___init * bpf_xdp_ct_alloc(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple, @@ -279,7 +321,7 @@ bpf_xdp_ct_alloc(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple, * @opts - Additional options for lookup (documented above) * Cannot be NULL * @opts__sz - Length of the bpf_ct_opts structure - * Must be NF_BPF_CT_OPTS_SZ (12) + * Must be NF_BPF_CT_OPTS_SZ (16) or 12 */ __bpf_kfunc struct nf_conn * bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple, @@ -312,7 +354,7 @@ bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple, * @opts - Additional options for allocation (documented above) * Cannot be NULL * @opts__sz - Length of the bpf_ct_opts structure - * Must be NF_BPF_CT_OPTS_SZ (12) + * Must be NF_BPF_CT_OPTS_SZ (16) or 12 */ __bpf_kfunc struct nf_conn___init * bpf_skb_ct_alloc(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple, @@ -347,7 +389,7 @@ bpf_skb_ct_alloc(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple, * @opts - Additional options for lookup (documented above) * Cannot be NULL * @opts__sz - Length of the bpf_ct_opts structure - * Must be NF_BPF_CT_OPTS_SZ (12) + * Must be NF_BPF_CT_OPTS_SZ (16) or 12 */ __bpf_kfunc struct nf_conn * bpf_skb_ct_lookup(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple, diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 7ac20750c127..d3cb53b008f5 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1090,7 +1090,7 @@ static int nf_ct_resolve_clash_harder(struct sk_buff *skb, u32 repl_idx) * A conntrack entry can be inserted to the connection tracking table * if there is no existing entry with an identical tuple. * - * If there is one, @skb (and the assocated, unconfirmed conntrack) has + * If there is one, @skb (and the associated, unconfirmed conntrack) has * to be dropped. In case @skb is retransmitted, next conntrack lookup * will find the already-existing entry. * @@ -1722,7 +1722,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, ct = __nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC, hash); if (IS_ERR(ct)) - return (struct nf_conntrack_tuple_hash *)ct; + return ERR_CAST(ct); if (!nf_ct_add_synproxy(ct, tmpl)) { nf_conntrack_free(ct); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 3b846cbdc050..123e2e933e9b 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1579,9 +1579,6 @@ static int ctnetlink_flush_conntrack(struct net *net, }; if (ctnetlink_needs_filter(family, cda)) { - if (cda[CTA_FILTER]) - return -EOPNOTSUPP; - filter = ctnetlink_alloc_filter(cda, family); if (IS_ERR(filter)) return PTR_ERR(filter); @@ -1610,14 +1607,14 @@ static int ctnetlink_del_conntrack(struct sk_buff *skb, if (err < 0) return err; - if (cda[CTA_TUPLE_ORIG]) + if (cda[CTA_TUPLE_ORIG] && !cda[CTA_FILTER]) err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG, family, &zone); - else if (cda[CTA_TUPLE_REPLY]) + else if (cda[CTA_TUPLE_REPLY] && !cda[CTA_FILTER]) err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, family, &zone); else { - u_int8_t u3 = info->nfmsg->version ? family : AF_UNSPEC; + u8 u3 = info->nfmsg->version || cda[CTA_FILTER] ? family : AF_UNSPEC; return ctnetlink_flush_conntrack(info->net, cda, NETLINK_CB(skb).portid, @@ -3420,7 +3417,8 @@ static int ctnetlink_del_expect(struct sk_buff *skb, if (cda[CTA_EXPECT_ID]) { __be32 id = nla_get_be32(cda[CTA_EXPECT_ID]); - if (ntohl(id) != (u32)(unsigned long)exp) { + + if (id != nf_expect_get_id(exp)) { nf_ct_expect_put(exp); return -ENOENT; } diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 74112e9c5dab..7d4f0fa8b609 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -22,9 +22,6 @@ #include <net/netfilter/nf_conntrack_acct.h> #include <net/netfilter/nf_conntrack_zones.h> #include <net/netfilter/nf_conntrack_timestamp.h> -#ifdef CONFIG_LWTUNNEL -#include <net/netfilter/nf_hooks_lwtunnel.h> -#endif #include <linux/rculist_nulls.h> static bool enable_hooks __read_mostly; @@ -527,7 +524,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_count); static unsigned int nf_conntrack_htable_size_user __read_mostly; static int -nf_conntrack_hash_sysctl(struct ctl_table *table, int write, +nf_conntrack_hash_sysctl(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int ret; @@ -612,9 +609,6 @@ enum nf_ct_sysctl_index { NF_SYSCTL_CT_PROTO_TIMEOUT_GRE, NF_SYSCTL_CT_PROTO_TIMEOUT_GRE_STREAM, #endif -#ifdef CONFIG_LWTUNNEL - NF_SYSCTL_CT_LWTUNNEL, -#endif NF_SYSCTL_CT_LAST_SYSCTL, }; @@ -946,15 +940,6 @@ static struct ctl_table nf_ct_sysctl_table[] = { .proc_handler = proc_dointvec_jiffies, }, #endif -#ifdef CONFIG_LWTUNNEL - [NF_SYSCTL_CT_LWTUNNEL] = { - .procname = "nf_hooks_lwtunnel", - .data = NULL, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = nf_hooks_lwtunnel_sysctl_handler, - }, -#endif }; static struct ctl_table nf_ct_netfilter_table[] = { diff --git a/net/netfilter/nf_flow_table_bpf.c b/net/netfilter/nf_flow_table_bpf.c new file mode 100644 index 000000000000..4a5f5195f2d2 --- /dev/null +++ b/net/netfilter/nf_flow_table_bpf.c @@ -0,0 +1,121 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Unstable Flow Table Helpers for XDP hook + * + * These are called from the XDP programs. + * Note that it is allowed to break compatibility for these functions since + * the interface they are exposed through to BPF programs is explicitly + * unstable. + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <net/netfilter/nf_flow_table.h> +#include <linux/bpf.h> +#include <linux/btf.h> +#include <net/xdp.h> + +/* bpf_flowtable_opts - options for bpf flowtable helpers + * @error: out parameter, set for any encountered error + */ +struct bpf_flowtable_opts { + s32 error; +}; + +enum { + NF_BPF_FLOWTABLE_OPTS_SZ = 4, +}; + +__diag_push(); +__diag_ignore_all("-Wmissing-prototypes", + "Global functions as their definitions will be in nf_flow_table BTF"); + +__bpf_kfunc_start_defs(); + +static struct flow_offload_tuple_rhash * +bpf_xdp_flow_tuple_lookup(struct net_device *dev, + struct flow_offload_tuple *tuple, __be16 proto) +{ + struct flow_offload_tuple_rhash *tuplehash; + struct nf_flowtable *nf_flow_table; + struct flow_offload *nf_flow; + + nf_flow_table = nf_flowtable_by_dev(dev); + if (!nf_flow_table) + return ERR_PTR(-ENOENT); + + tuplehash = flow_offload_lookup(nf_flow_table, tuple); + if (!tuplehash) + return ERR_PTR(-ENOENT); + + nf_flow = container_of(tuplehash, struct flow_offload, + tuplehash[tuplehash->tuple.dir]); + flow_offload_refresh(nf_flow_table, nf_flow, false); + + return tuplehash; +} + +__bpf_kfunc struct flow_offload_tuple_rhash * +bpf_xdp_flow_lookup(struct xdp_md *ctx, struct bpf_fib_lookup *fib_tuple, + struct bpf_flowtable_opts *opts, u32 opts_len) +{ + struct xdp_buff *xdp = (struct xdp_buff *)ctx; + struct flow_offload_tuple tuple = { + .iifidx = fib_tuple->ifindex, + .l3proto = fib_tuple->family, + .l4proto = fib_tuple->l4_protocol, + .src_port = fib_tuple->sport, + .dst_port = fib_tuple->dport, + }; + struct flow_offload_tuple_rhash *tuplehash; + __be16 proto; + + if (opts_len != NF_BPF_FLOWTABLE_OPTS_SZ) { + opts->error = -EINVAL; + return NULL; + } + + switch (fib_tuple->family) { + case AF_INET: + tuple.src_v4.s_addr = fib_tuple->ipv4_src; + tuple.dst_v4.s_addr = fib_tuple->ipv4_dst; + proto = htons(ETH_P_IP); + break; + case AF_INET6: + tuple.src_v6 = *(struct in6_addr *)&fib_tuple->ipv6_src; + tuple.dst_v6 = *(struct in6_addr *)&fib_tuple->ipv6_dst; + proto = htons(ETH_P_IPV6); + break; + default: + opts->error = -EAFNOSUPPORT; + return NULL; + } + + tuplehash = bpf_xdp_flow_tuple_lookup(xdp->rxq->dev, &tuple, proto); + if (IS_ERR(tuplehash)) { + opts->error = PTR_ERR(tuplehash); + return NULL; + } + + return tuplehash; +} + +__diag_pop() + +__bpf_kfunc_end_defs(); + +BTF_KFUNCS_START(nf_ft_kfunc_set) +BTF_ID_FLAGS(func, bpf_xdp_flow_lookup, KF_TRUSTED_ARGS | KF_RET_NULL) +BTF_KFUNCS_END(nf_ft_kfunc_set) + +static const struct btf_kfunc_id_set nf_flow_kfunc_set = { + .owner = THIS_MODULE, + .set = &nf_ft_kfunc_set, +}; + +int nf_flow_register_bpf(void) +{ + return register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, + &nf_flow_kfunc_set); +} +EXPORT_SYMBOL_GPL(nf_flow_register_bpf); diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 5c1ff07eaee0..df72b0376970 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -670,8 +670,14 @@ static int __init nf_flow_table_module_init(void) if (ret) goto out_offload; + ret = nf_flow_register_bpf(); + if (ret) + goto out_bpf; + return 0; +out_bpf: + nf_flow_table_offload_exit(); out_offload: unregister_pernet_subsys(&nf_flow_table_net_ops); return ret; diff --git a/net/netfilter/nf_flow_table_inet.c b/net/netfilter/nf_flow_table_inet.c index 6eef15648b7b..b0f199171932 100644 --- a/net/netfilter/nf_flow_table_inet.c +++ b/net/netfilter/nf_flow_table_inet.c @@ -17,6 +17,9 @@ nf_flow_offload_inet_hook(void *priv, struct sk_buff *skb, switch (skb->protocol) { case htons(ETH_P_8021Q): + if (!pskb_may_pull(skb, skb_mac_offset(skb) + sizeof(*veth))) + return NF_ACCEPT; + veth = (struct vlan_ethhdr *)skb_mac_header(skb); proto = veth->h_vlan_encapsulated_proto; break; diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index c2c005234dcd..98edcaa37b38 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -281,6 +281,9 @@ static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto, switch (skb->protocol) { case htons(ETH_P_8021Q): + if (!pskb_may_pull(skb, skb_mac_offset(skb) + sizeof(*veth))) + return false; + veth = (struct vlan_ethhdr *)skb_mac_header(skb); if (veth->h_vlan_encapsulated_proto == proto) { *offset += VLAN_HLEN; diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index a010b25076ca..e06bc36f49fe 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -841,8 +841,8 @@ static int nf_flow_offload_tuple(struct nf_flowtable *flowtable, struct list_head *block_cb_list) { struct flow_cls_offload cls_flow = {}; + struct netlink_ext_ack extack = {}; struct flow_block_cb *block_cb; - struct netlink_ext_ack extack; __be16 proto = ETH_P_ALL; int err, i = 0; @@ -1192,7 +1192,7 @@ int nf_flow_table_offload_setup(struct nf_flowtable *flowtable, int err; if (!nf_flowtable_hw_offload(flowtable)) - return 0; + return nf_flow_offload_xdp_setup(flowtable, dev, cmd); if (dev->netdev_ops->ndo_setup_tc) err = nf_flow_table_offload_cmd(&bo, flowtable, dev, cmd, diff --git a/net/netfilter/nf_flow_table_xdp.c b/net/netfilter/nf_flow_table_xdp.c new file mode 100644 index 000000000000..e1252d042699 --- /dev/null +++ b/net/netfilter/nf_flow_table_xdp.c @@ -0,0 +1,147 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/netfilter.h> +#include <linux/rhashtable.h> +#include <linux/netdevice.h> +#include <net/flow_offload.h> +#include <net/netfilter/nf_flow_table.h> + +struct flow_offload_xdp_ft { + struct list_head head; + struct nf_flowtable *ft; + struct rcu_head rcuhead; +}; + +struct flow_offload_xdp { + struct hlist_node hnode; + unsigned long net_device_addr; + struct list_head head; +}; + +#define NF_XDP_HT_BITS 4 +static DEFINE_HASHTABLE(nf_xdp_hashtable, NF_XDP_HT_BITS); +static DEFINE_MUTEX(nf_xdp_hashtable_lock); + +/* caller must hold rcu read lock */ +struct nf_flowtable *nf_flowtable_by_dev(const struct net_device *dev) +{ + unsigned long key = (unsigned long)dev; + struct flow_offload_xdp *iter; + + hash_for_each_possible_rcu(nf_xdp_hashtable, iter, hnode, key) { + if (key == iter->net_device_addr) { + struct flow_offload_xdp_ft *ft_elem; + + /* The user is supposed to insert a given net_device + * just into a single nf_flowtable so we always return + * the first element here. + */ + ft_elem = list_first_or_null_rcu(&iter->head, + struct flow_offload_xdp_ft, + head); + return ft_elem ? ft_elem->ft : NULL; + } + } + + return NULL; +} + +static int nf_flowtable_by_dev_insert(struct nf_flowtable *ft, + const struct net_device *dev) +{ + struct flow_offload_xdp *iter, *elem = NULL; + unsigned long key = (unsigned long)dev; + struct flow_offload_xdp_ft *ft_elem; + + ft_elem = kzalloc(sizeof(*ft_elem), GFP_KERNEL_ACCOUNT); + if (!ft_elem) + return -ENOMEM; + + ft_elem->ft = ft; + + mutex_lock(&nf_xdp_hashtable_lock); + + hash_for_each_possible(nf_xdp_hashtable, iter, hnode, key) { + if (key == iter->net_device_addr) { + elem = iter; + break; + } + } + + if (!elem) { + elem = kzalloc(sizeof(*elem), GFP_KERNEL_ACCOUNT); + if (!elem) + goto err_unlock; + + elem->net_device_addr = key; + INIT_LIST_HEAD(&elem->head); + hash_add_rcu(nf_xdp_hashtable, &elem->hnode, key); + } + list_add_tail_rcu(&ft_elem->head, &elem->head); + + mutex_unlock(&nf_xdp_hashtable_lock); + + return 0; + +err_unlock: + mutex_unlock(&nf_xdp_hashtable_lock); + kfree(ft_elem); + + return -ENOMEM; +} + +static void nf_flowtable_by_dev_remove(struct nf_flowtable *ft, + const struct net_device *dev) +{ + struct flow_offload_xdp *iter, *elem = NULL; + unsigned long key = (unsigned long)dev; + + mutex_lock(&nf_xdp_hashtable_lock); + + hash_for_each_possible(nf_xdp_hashtable, iter, hnode, key) { + if (key == iter->net_device_addr) { + elem = iter; + break; + } + } + + if (elem) { + struct flow_offload_xdp_ft *ft_elem, *ft_next; + + list_for_each_entry_safe(ft_elem, ft_next, &elem->head, head) { + if (ft_elem->ft == ft) { + list_del_rcu(&ft_elem->head); + kfree_rcu(ft_elem, rcuhead); + } + } + + if (list_empty(&elem->head)) + hash_del_rcu(&elem->hnode); + else + elem = NULL; + } + + mutex_unlock(&nf_xdp_hashtable_lock); + + if (elem) { + synchronize_rcu(); + kfree(elem); + } +} + +int nf_flow_offload_xdp_setup(struct nf_flowtable *flowtable, + struct net_device *dev, + enum flow_block_command cmd) +{ + switch (cmd) { + case FLOW_BLOCK_BIND: + return nf_flowtable_by_dev_insert(flowtable, dev); + case FLOW_BLOCK_UNBIND: + nf_flowtable_by_dev_remove(flowtable, dev); + return 0; + } + + WARN_ON_ONCE(1); + return 0; +} diff --git a/net/netfilter/nf_hooks_lwtunnel.c b/net/netfilter/nf_hooks_lwtunnel.c index 00e89ffd78f6..2d890dd04ff8 100644 --- a/net/netfilter/nf_hooks_lwtunnel.c +++ b/net/netfilter/nf_hooks_lwtunnel.c @@ -3,6 +3,9 @@ #include <linux/sysctl.h> #include <net/lwtunnel.h> #include <net/netfilter/nf_hooks_lwtunnel.h> +#include <linux/netfilter.h> + +#include "nf_internals.h" static inline int nf_hooks_lwtunnel_get(void) { @@ -25,7 +28,7 @@ static inline int nf_hooks_lwtunnel_set(int enable) } #ifdef CONFIG_SYSCTL -int nf_hooks_lwtunnel_sysctl_handler(struct ctl_table *table, int write, +int nf_hooks_lwtunnel_sysctl_handler(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int proc_nf_hooks_lwtunnel_enabled = 0; @@ -50,4 +53,71 @@ int nf_hooks_lwtunnel_sysctl_handler(struct ctl_table *table, int write, return ret; } EXPORT_SYMBOL_GPL(nf_hooks_lwtunnel_sysctl_handler); + +static struct ctl_table nf_lwtunnel_sysctl_table[] = { + { + .procname = "nf_hooks_lwtunnel", + .data = NULL, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = nf_hooks_lwtunnel_sysctl_handler, + }, +}; + +static int __net_init nf_lwtunnel_net_init(struct net *net) +{ + struct ctl_table_header *hdr; + struct ctl_table *table; + + table = nf_lwtunnel_sysctl_table; + if (!net_eq(net, &init_net)) { + table = kmemdup(nf_lwtunnel_sysctl_table, + sizeof(nf_lwtunnel_sysctl_table), + GFP_KERNEL); + if (!table) + goto err_alloc; + } + + hdr = register_net_sysctl_sz(net, "net/netfilter", table, + ARRAY_SIZE(nf_lwtunnel_sysctl_table)); + if (!hdr) + goto err_reg; + + net->nf.nf_lwtnl_dir_header = hdr; + + return 0; +err_reg: + if (!net_eq(net, &init_net)) + kfree(table); +err_alloc: + return -ENOMEM; +} + +static void __net_exit nf_lwtunnel_net_exit(struct net *net) +{ + const struct ctl_table *table; + + table = net->nf.nf_lwtnl_dir_header->ctl_table_arg; + unregister_net_sysctl_table(net->nf.nf_lwtnl_dir_header); + if (!net_eq(net, &init_net)) + kfree(table); +} + +static struct pernet_operations nf_lwtunnel_net_ops = { + .init = nf_lwtunnel_net_init, + .exit = nf_lwtunnel_net_exit, +}; + +int __init netfilter_lwtunnel_init(void) +{ + return register_pernet_subsys(&nf_lwtunnel_net_ops); +} + +void netfilter_lwtunnel_fini(void) +{ + unregister_pernet_subsys(&nf_lwtunnel_net_ops); +} +#else +int __init netfilter_lwtunnel_init(void) { return 0; } +void netfilter_lwtunnel_fini(void) {} #endif /* CONFIG_SYSCTL */ diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h index 832ae64179f0..25403023060b 100644 --- a/net/netfilter/nf_internals.h +++ b/net/netfilter/nf_internals.h @@ -29,6 +29,12 @@ void nf_queue_nf_hook_drop(struct net *net); /* nf_log.c */ int __init netfilter_log_init(void); +#ifdef CONFIG_LWTUNNEL +/* nf_hooks_lwtunnel.c */ +int __init netfilter_lwtunnel_init(void); +void netfilter_lwtunnel_fini(void); +#endif + /* core.c */ void nf_hook_entries_delete_raw(struct nf_hook_entries __rcu **pp, const struct nf_hook_ops *reg); diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 769fd7680fac..6dd0de33eebd 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -408,7 +408,7 @@ static struct ctl_table nf_log_sysctl_ftable[] = { }, }; -static int nf_log_proc_dostring(struct ctl_table *table, int write, +static int nf_log_proc_dostring(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { const struct nf_logger *logger; diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 016c816d91cb..6d8da6dddf99 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -1104,7 +1104,7 @@ int nf_nat_register_fn(struct net *net, u8 pf, const struct nf_hook_ops *ops, if (!nat_proto_net->nat_hook_ops) { WARN_ON(nat_proto_net->users != 0); - nat_ops = kmemdup(orig_nat_ops, sizeof(*orig_nat_ops) * ops_count, GFP_KERNEL); + nat_ops = kmemdup_array(orig_nat_ops, ops_count, sizeof(*orig_nat_ops), GFP_KERNEL); if (!nat_ops) { mutex_unlock(&nf_nat_proto_mutex); return -ENOMEM; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index be3b4c90d2ed..57259b5f3ef5 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -146,6 +146,8 @@ static void nft_ctx_init(struct nft_ctx *ctx, ctx->report = nlmsg_report(nlh); ctx->flags = nlh->nlmsg_flags; ctx->seq = nlh->nlmsg_seq; + + bitmap_zero(ctx->reg_inited, NFT_REG32_NUM); } static struct nft_trans *nft_trans_alloc_gfp(const struct nft_ctx *ctx, @@ -153,14 +155,18 @@ static struct nft_trans *nft_trans_alloc_gfp(const struct nft_ctx *ctx, { struct nft_trans *trans; - trans = kzalloc(sizeof(struct nft_trans) + size, gfp); + trans = kzalloc(size, gfp); if (trans == NULL) return NULL; INIT_LIST_HEAD(&trans->list); - INIT_LIST_HEAD(&trans->binding_list); trans->msg_type = msg_type; - trans->ctx = *ctx; + + trans->net = ctx->net; + trans->table = ctx->table; + trans->seq = ctx->seq; + trans->flags = ctx->flags; + trans->report = ctx->report; return trans; } @@ -171,10 +177,26 @@ static struct nft_trans *nft_trans_alloc(const struct nft_ctx *ctx, return nft_trans_alloc_gfp(ctx, msg_type, size, GFP_KERNEL); } +static struct nft_trans_binding *nft_trans_get_binding(struct nft_trans *trans) +{ + switch (trans->msg_type) { + case NFT_MSG_NEWCHAIN: + case NFT_MSG_NEWSET: + return container_of(trans, struct nft_trans_binding, nft_trans); + } + + return NULL; +} + static void nft_trans_list_del(struct nft_trans *trans) { + struct nft_trans_binding *trans_binding; + list_del(&trans->list); - list_del(&trans->binding_list); + + trans_binding = nft_trans_get_binding(trans); + if (trans_binding) + list_del(&trans_binding->binding_list); } static void nft_trans_destroy(struct nft_trans *trans) @@ -236,7 +258,7 @@ static void __nft_chain_trans_bind(const struct nft_ctx *ctx, nft_trans_chain_bound(trans) = bind; break; case NFT_MSG_NEWRULE: - if (trans->ctx.chain == chain) + if (nft_trans_rule_chain(trans) == chain) nft_trans_rule_bound(trans) = bind; break; } @@ -372,21 +394,31 @@ static void nf_tables_unregister_hook(struct net *net, static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *trans) { struct nftables_pernet *nft_net = nft_pernet(net); + struct nft_trans_binding *binding; + struct nft_trans_set *trans_set; + + list_add_tail(&trans->list, &nft_net->commit_list); + + binding = nft_trans_get_binding(trans); + if (!binding) + return; switch (trans->msg_type) { case NFT_MSG_NEWSET: + trans_set = nft_trans_container_set(trans); + if (!nft_trans_set_update(trans) && nft_set_is_anonymous(nft_trans_set(trans))) - list_add_tail(&trans->binding_list, &nft_net->binding_list); + list_add_tail(&binding->binding_list, &nft_net->binding_list); + + list_add_tail(&trans_set->list_trans_newset, &nft_net->commit_set_list); break; case NFT_MSG_NEWCHAIN: if (!nft_trans_chain_update(trans) && nft_chain_binding(nft_trans_chain(trans))) - list_add_tail(&trans->binding_list, &nft_net->binding_list); + list_add_tail(&binding->binding_list, &nft_net->binding_list); break; } - - list_add_tail(&trans->list, &nft_net->commit_list); } static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type) @@ -416,11 +448,28 @@ static int nft_deltable(struct nft_ctx *ctx) return err; } -static struct nft_trans *nft_trans_chain_add(struct nft_ctx *ctx, int msg_type) +static struct nft_trans * +nft_trans_alloc_chain(const struct nft_ctx *ctx, int msg_type) { + struct nft_trans_chain *trans_chain; struct nft_trans *trans; trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_chain)); + if (!trans) + return NULL; + + trans_chain = nft_trans_container_chain(trans); + INIT_LIST_HEAD(&trans_chain->nft_trans_binding.binding_list); + trans_chain->chain = ctx->chain; + + return trans; +} + +static struct nft_trans *nft_trans_chain_add(struct nft_ctx *ctx, int msg_type) +{ + struct nft_trans *trans; + + trans = nft_trans_alloc_chain(ctx, msg_type); if (trans == NULL) return ERR_PTR(-ENOMEM); @@ -432,7 +481,6 @@ static struct nft_trans *nft_trans_chain_add(struct nft_ctx *ctx, int msg_type) ntohl(nla_get_be32(ctx->nla[NFTA_CHAIN_ID])); } } - nft_trans_chain(trans) = ctx->chain; nft_trans_commit_list_add_tail(ctx->net, trans); return trans; @@ -505,6 +553,7 @@ static struct nft_trans *nft_trans_rule_add(struct nft_ctx *ctx, int msg_type, ntohl(nla_get_be32(ctx->nla[NFTA_RULE_ID])); } nft_trans_rule(trans) = rule; + nft_trans_rule_chain(trans) = ctx->chain; nft_trans_commit_list_add_tail(ctx->net, trans); return trans; @@ -560,12 +609,17 @@ static int __nft_trans_set_add(const struct nft_ctx *ctx, int msg_type, struct nft_set *set, const struct nft_set_desc *desc) { + struct nft_trans_set *trans_set; struct nft_trans *trans; trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_set)); if (trans == NULL) return -ENOMEM; + trans_set = nft_trans_container_set(trans); + INIT_LIST_HEAD(&trans_set->nft_trans_binding.binding_list); + INIT_LIST_HEAD(&trans_set->list_trans_newset); + if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] && !desc) { nft_trans_set_id(trans) = ntohl(nla_get_be32(ctx->nla[NFTA_SET_ID])); @@ -1217,11 +1271,11 @@ static bool nft_table_pending_update(const struct nft_ctx *ctx) return true; list_for_each_entry(trans, &nft_net->commit_list, list) { - if (trans->ctx.table == ctx->table && + if (trans->table == ctx->table && ((trans->msg_type == NFT_MSG_NEWCHAIN && nft_trans_chain_update(trans)) || (trans->msg_type == NFT_MSG_DELCHAIN && - nft_is_base_chain(trans->ctx.chain)))) + nft_is_base_chain(nft_trans_chain(trans))))) return true; } @@ -1615,15 +1669,15 @@ static int nf_tables_deltable(struct sk_buff *skb, const struct nfnl_info *info, return nft_flush_table(&ctx); } -static void nf_tables_table_destroy(struct nft_ctx *ctx) +static void nf_tables_table_destroy(struct nft_table *table) { - if (WARN_ON(ctx->table->use > 0)) + if (WARN_ON(table->use > 0)) return; - rhltable_destroy(&ctx->table->chains_ht); - kfree(ctx->table->name); - kfree(ctx->table->udata); - kfree(ctx->table); + rhltable_destroy(&table->chains_ht); + kfree(table->name); + kfree(table->udata); + kfree(table); } void nft_register_chain_type(const struct nft_chain_type *ctype) @@ -2049,18 +2103,19 @@ static struct nft_stats __percpu *nft_stats_alloc(const struct nlattr *attr) return newstats; } -static void nft_chain_stats_replace(struct nft_trans *trans) +static void nft_chain_stats_replace(struct nft_trans_chain *trans) { - struct nft_base_chain *chain = nft_base_chain(trans->ctx.chain); + const struct nft_trans *t = &trans->nft_trans_binding.nft_trans; + struct nft_base_chain *chain = nft_base_chain(trans->chain); - if (!nft_trans_chain_stats(trans)) + if (!trans->stats) return; - nft_trans_chain_stats(trans) = - rcu_replace_pointer(chain->stats, nft_trans_chain_stats(trans), - lockdep_commit_lock_is_held(trans->ctx.net)); + trans->stats = + rcu_replace_pointer(chain->stats, trans->stats, + lockdep_commit_lock_is_held(t->net)); - if (!nft_trans_chain_stats(trans)) + if (!trans->stats) static_branch_inc(&nft_counters_enabled); } @@ -2078,9 +2133,9 @@ static void nf_tables_chain_free_chain_rules(struct nft_chain *chain) kvfree(chain->blob_next); } -void nf_tables_chain_destroy(struct nft_ctx *ctx) +void nf_tables_chain_destroy(struct nft_chain *chain) { - struct nft_chain *chain = ctx->chain; + const struct nft_table *table = chain->table; struct nft_hook *hook, *next; if (WARN_ON(chain->use > 0)) @@ -2092,7 +2147,7 @@ void nf_tables_chain_destroy(struct nft_ctx *ctx) if (nft_is_base_chain(chain)) { struct nft_base_chain *basechain = nft_base_chain(chain); - if (nft_base_chain_netdev(ctx->family, basechain->ops.hooknum)) { + if (nft_base_chain_netdev(table->family, basechain->ops.hooknum)) { list_for_each_entry_safe(hook, next, &basechain->hook_list, list) { list_del_rcu(&hook->list); @@ -2581,7 +2636,7 @@ err_chain_add: err_trans: nft_use_dec_restore(&table->use); err_destroy_chain: - nf_tables_chain_destroy(ctx); + nf_tables_chain_destroy(chain); return err; } @@ -2698,8 +2753,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, } err = -ENOMEM; - trans = nft_trans_alloc(ctx, NFT_MSG_NEWCHAIN, - sizeof(struct nft_trans_chain)); + trans = nft_trans_alloc_chain(ctx, NFT_MSG_NEWCHAIN); if (trans == NULL) goto err_trans; @@ -2725,7 +2779,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, err = -EEXIST; list_for_each_entry(tmp, &nft_net->commit_list, list) { if (tmp->msg_type == NFT_MSG_NEWCHAIN && - tmp->ctx.table == table && + tmp->table == table && nft_trans_chain_update(tmp) && nft_trans_chain_name(tmp) && strcmp(name, nft_trans_chain_name(tmp)) == 0) { @@ -2774,13 +2828,11 @@ static struct nft_chain *nft_chain_lookup_byid(const struct net *net, struct nft_trans *trans; list_for_each_entry(trans, &nft_net->commit_list, list) { - struct nft_chain *chain = trans->ctx.chain; - if (trans->msg_type == NFT_MSG_NEWCHAIN && - chain->table == table && + nft_trans_chain(trans)->table == table && id == nft_trans_chain_id(trans) && - nft_active_genmask(chain, genmask)) - return chain; + nft_active_genmask(nft_trans_chain(trans), genmask)) + return nft_trans_chain(trans); } return ERR_PTR(-ENOENT); } @@ -2915,8 +2967,7 @@ static int nft_delchain_hook(struct nft_ctx *ctx, list_move(&hook->list, &chain_del_list); } - trans = nft_trans_alloc(ctx, NFT_MSG_DELCHAIN, - sizeof(struct nft_trans_chain)); + trans = nft_trans_alloc_chain(ctx, NFT_MSG_DELCHAIN); if (!trans) { err = -ENOMEM; goto err_chain_del_hook; @@ -3823,10 +3874,18 @@ static void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *r nf_tables_rule_destroy(ctx, rule); } +/** nft_chain_validate - loop detection and hook validation + * + * @ctx: context containing call depth and base chain + * @chain: chain to validate + * + * Walk through the rules of the given chain and chase all jumps/gotos + * and set lookups until either the jump limit is hit or all reachable + * chains have been validated. + */ int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain) { struct nft_expr *expr, *last; - const struct nft_data *data; struct nft_rule *rule; int err; @@ -3844,7 +3903,10 @@ int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain) if (!expr->ops->validate) continue; - err = expr->ops->validate(ctx, expr, &data); + /* This may call nft_chain_validate() recursively, + * callers that do so must increment ctx->level. + */ + err = expr->ops->validate(ctx, expr); if (err < 0) return err; } @@ -4188,7 +4250,7 @@ static struct nft_rule *nft_rule_lookup_byid(const struct net *net, list_for_each_entry(trans, &nft_net->commit_list, list) { if (trans->msg_type == NFT_MSG_NEWRULE && - trans->ctx.chain == chain && + nft_trans_rule_chain(trans) == chain && id == nft_trans_rule_id(trans)) return nft_trans_rule(trans); } @@ -4430,17 +4492,16 @@ static struct nft_set *nft_set_lookup_byid(const struct net *net, { struct nftables_pernet *nft_net = nft_pernet(net); u32 id = ntohl(nla_get_be32(nla)); - struct nft_trans *trans; + struct nft_trans_set *trans; - list_for_each_entry(trans, &nft_net->commit_list, list) { - if (trans->msg_type == NFT_MSG_NEWSET) { - struct nft_set *set = nft_trans_set(trans); + /* its likely the id we need is at the tail, not at start */ + list_for_each_entry_reverse(trans, &nft_net->commit_set_list, list_trans_newset) { + struct nft_set *set = trans->set; - if (id == nft_trans_set_id(trans) && - set->table == table && - nft_active_genmask(set, genmask)) - return set; - } + if (id == trans->set_id && + set->table == table && + nft_active_genmask(set, genmask)) + return set; } return ERR_PTR(-ENOENT); } @@ -4532,7 +4593,7 @@ int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result) return -ERANGE; ms *= NSEC_PER_MSEC; - *result = nsecs_to_jiffies64(ms); + *result = nsecs_to_jiffies64(ms) ? : !!ms; return 0; } @@ -5633,12 +5694,8 @@ const struct nft_set_ext_type nft_set_ext_types[] = { .align = __alignof__(u8), }, [NFT_SET_EXT_TIMEOUT] = { - .len = sizeof(u64), - .align = __alignof__(u64), - }, - [NFT_SET_EXT_EXPIRATION] = { - .len = sizeof(u64), - .align = __alignof__(u64), + .len = sizeof(struct nft_timeout), + .align = __alignof__(struct nft_timeout), }, [NFT_SET_EXT_USERDATA] = { .len = sizeof(struct nft_userdata), @@ -5740,8 +5797,7 @@ static int nf_tables_fill_setelem(struct sk_buff *skb, if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) && nft_data_dump(skb, NFTA_SET_ELEM_DATA, nft_set_ext_data(ext), - set->dtype == NFT_DATA_VERDICT ? NFT_DATA_VERDICT : NFT_DATA_VALUE, - set->dlen) < 0) + nft_set_datatype(set), set->dlen) < 0) goto nla_put_failure; if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPRESSIONS) && @@ -5758,25 +5814,32 @@ static int nf_tables_fill_setelem(struct sk_buff *skb, htonl(*nft_set_ext_flags(ext)))) goto nla_put_failure; - if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT) && - nla_put_be64(skb, NFTA_SET_ELEM_TIMEOUT, - nf_jiffies64_to_msecs(*nft_set_ext_timeout(ext)), - NFTA_SET_ELEM_PAD)) - goto nla_put_failure; + if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT)) { + u64 timeout = READ_ONCE(nft_set_ext_timeout(ext)->timeout); + u64 set_timeout = READ_ONCE(set->timeout); + __be64 msecs = 0; - if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) { - u64 expires, now = get_jiffies_64(); + if (set_timeout != timeout) { + msecs = nf_jiffies64_to_msecs(timeout); + if (nla_put_be64(skb, NFTA_SET_ELEM_TIMEOUT, msecs, + NFTA_SET_ELEM_PAD)) + goto nla_put_failure; + } - expires = *nft_set_ext_expiration(ext); - if (time_before64(now, expires)) - expires -= now; - else - expires = 0; + if (timeout > 0) { + u64 expires, now = get_jiffies_64(); - if (nla_put_be64(skb, NFTA_SET_ELEM_EXPIRATION, - nf_jiffies64_to_msecs(expires), - NFTA_SET_ELEM_PAD)) - goto nla_put_failure; + expires = READ_ONCE(nft_set_ext_timeout(ext)->expiration); + if (time_before64(now, expires)) + expires -= now; + else + expires = 0; + + if (nla_put_be64(skb, NFTA_SET_ELEM_EXPIRATION, + nf_jiffies64_to_msecs(expires), + NFTA_SET_ELEM_PAD)) + goto nla_put_failure; + } } if (nft_set_ext_exists(ext, NFT_SET_EXT_USERDATA)) { @@ -6439,13 +6502,14 @@ struct nft_elem_priv *nft_set_elem_init(const struct nft_set *set, nft_set_ext_data(ext), data, set->dlen) < 0) goto err_ext_check; - if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) { - *nft_set_ext_expiration(ext) = get_jiffies_64() + expiration; + if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT)) { + nft_set_ext_timeout(ext)->timeout = timeout; + if (expiration == 0) - *nft_set_ext_expiration(ext) += timeout; + expiration = timeout; + + nft_set_ext_timeout(ext)->expiration = get_jiffies_64() + expiration; } - if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT)) - *nft_set_ext_timeout(ext) = timeout; return elem; @@ -6788,6 +6852,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, struct nft_data_desc desc; enum nft_registers dreg; struct nft_trans *trans; + u8 update_flags; u64 expiration; u64 timeout; int err, i; @@ -6856,17 +6921,23 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, return err; } else if (set->flags & NFT_SET_TIMEOUT && !(flags & NFT_SET_ELEM_INTERVAL_END)) { - timeout = READ_ONCE(set->timeout); + timeout = set->timeout; } expiration = 0; if (nla[NFTA_SET_ELEM_EXPIRATION] != NULL) { if (!(set->flags & NFT_SET_TIMEOUT)) return -EINVAL; + if (timeout == 0) + return -EOPNOTSUPP; + err = nf_msecs_to_jiffies64(nla[NFTA_SET_ELEM_EXPIRATION], &expiration); if (err) return err; + + if (expiration > timeout) + return -ERANGE; } if (nla[NFTA_SET_ELEM_EXPR]) { @@ -6952,16 +7023,10 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, goto err_parse_key_end; } - if (timeout > 0) { - err = nft_set_ext_add(&tmpl, NFT_SET_EXT_EXPIRATION); + if (set->flags & NFT_SET_TIMEOUT) { + err = nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT); if (err < 0) goto err_parse_key_end; - - if (timeout != READ_ONCE(set->timeout)) { - err = nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT); - if (err < 0) - goto err_parse_key_end; - } } if (num_exprs) { @@ -7099,8 +7164,30 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, nft_set_ext_exists(ext2, NFT_SET_EXT_OBJREF) && *nft_set_ext_obj(ext) != *nft_set_ext_obj(ext2))) goto err_element_clash; - else if (!(nlmsg_flags & NLM_F_EXCL)) + else if (!(nlmsg_flags & NLM_F_EXCL)) { err = 0; + if (nft_set_ext_exists(ext2, NFT_SET_EXT_TIMEOUT)) { + update_flags = 0; + if (timeout != nft_set_ext_timeout(ext2)->timeout) { + nft_trans_elem_timeout(trans) = timeout; + if (expiration == 0) + expiration = timeout; + + update_flags |= NFT_TRANS_UPD_TIMEOUT; + } + if (expiration) { + nft_trans_elem_expiration(trans) = expiration; + update_flags |= NFT_TRANS_UPD_EXPIRATION; + } + + if (update_flags) { + nft_trans_elem_priv(trans) = elem_priv; + nft_trans_elem_update_flags(trans) = update_flags; + nft_trans_commit_list_add_tail(ctx->net, trans); + goto err_elem_free; + } + } + } } else if (err == -ENOTEMPTY) { /* ENOTEMPTY reports overlapping between this element * and an existing one. @@ -7966,6 +8053,19 @@ cont: return skb->len; } +static int nf_tables_dumpreset_obj(struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct nftables_pernet *nft_net = nft_pernet(sock_net(skb->sk)); + int ret; + + mutex_lock(&nft_net->commit_mutex); + ret = nf_tables_dump_obj(skb, cb); + mutex_unlock(&nft_net->commit_mutex); + + return ret; +} + static int nf_tables_dump_obj_start(struct netlink_callback *cb) { struct nft_obj_dump_ctx *ctx = (void *)cb->ctx; @@ -7982,12 +8082,18 @@ static int nf_tables_dump_obj_start(struct netlink_callback *cb) if (nla[NFTA_OBJ_TYPE]) ctx->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); - if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET) - ctx->reset = true; - return 0; } +static int nf_tables_dumpreset_obj_start(struct netlink_callback *cb) +{ + struct nft_obj_dump_ctx *ctx = (void *)cb->ctx; + + ctx->reset = true; + + return nf_tables_dump_obj_start(cb); +} + static int nf_tables_dump_obj_done(struct netlink_callback *cb) { struct nft_obj_dump_ctx *ctx = (void *)cb->ctx; @@ -7998,8 +8104,9 @@ static int nf_tables_dump_obj_done(struct netlink_callback *cb) } /* called with rcu_read_lock held */ -static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info, - const struct nlattr * const nla[]) +static struct sk_buff * +nf_tables_getobj_single(u32 portid, const struct nfnl_info *info, + const struct nlattr * const nla[], bool reset) { struct netlink_ext_ack *extack = info->extack; u8 genmask = nft_genmask_cur(info->net); @@ -8008,72 +8115,109 @@ static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info, struct net *net = info->net; struct nft_object *obj; struct sk_buff *skb2; - bool reset = false; u32 objtype; int err; - if (info->nlh->nlmsg_flags & NLM_F_DUMP) { - struct netlink_dump_control c = { - .start = nf_tables_dump_obj_start, - .dump = nf_tables_dump_obj, - .done = nf_tables_dump_obj_done, - .module = THIS_MODULE, - .data = (void *)nla, - }; - - return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c); - } - if (!nla[NFTA_OBJ_NAME] || !nla[NFTA_OBJ_TYPE]) - return -EINVAL; + return ERR_PTR(-EINVAL); table = nft_table_lookup(net, nla[NFTA_OBJ_TABLE], family, genmask, 0); if (IS_ERR(table)) { NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_TABLE]); - return PTR_ERR(table); + return ERR_CAST(table); } objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); obj = nft_obj_lookup(net, table, nla[NFTA_OBJ_NAME], objtype, genmask); if (IS_ERR(obj)) { NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_NAME]); - return PTR_ERR(obj); + return ERR_CAST(obj); } skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); if (!skb2) - return -ENOMEM; + return ERR_PTR(-ENOMEM); + + err = nf_tables_fill_obj_info(skb2, net, portid, + info->nlh->nlmsg_seq, NFT_MSG_NEWOBJ, 0, + family, table, obj, reset); + if (err < 0) { + kfree_skb(skb2); + return ERR_PTR(err); + } - if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET) - reset = true; + return skb2; +} - if (reset) { - const struct nftables_pernet *nft_net; - char *buf; +static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info, + const struct nlattr * const nla[]) +{ + u32 portid = NETLINK_CB(skb).portid; + struct sk_buff *skb2; - nft_net = nft_pernet(net); - buf = kasprintf(GFP_ATOMIC, "%s:%u", table->name, nft_net->base_seq); + if (info->nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .start = nf_tables_dump_obj_start, + .dump = nf_tables_dump_obj, + .done = nf_tables_dump_obj_done, + .module = THIS_MODULE, + .data = (void *)nla, + }; - audit_log_nfcfg(buf, - family, - 1, - AUDIT_NFT_OP_OBJ_RESET, - GFP_ATOMIC); - kfree(buf); + return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c); } - err = nf_tables_fill_obj_info(skb2, net, NETLINK_CB(skb).portid, - info->nlh->nlmsg_seq, NFT_MSG_NEWOBJ, 0, - family, table, obj, reset); - if (err < 0) - goto err_fill_obj_info; + skb2 = nf_tables_getobj_single(portid, info, nla, false); + if (IS_ERR(skb2)) + return PTR_ERR(skb2); - return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid); + return nfnetlink_unicast(skb2, info->net, portid); +} -err_fill_obj_info: - kfree_skb(skb2); - return err; +static int nf_tables_getobj_reset(struct sk_buff *skb, + const struct nfnl_info *info, + const struct nlattr * const nla[]) +{ + struct nftables_pernet *nft_net = nft_pernet(info->net); + u32 portid = NETLINK_CB(skb).portid; + struct net *net = info->net; + struct sk_buff *skb2; + char *buf; + + if (info->nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .start = nf_tables_dumpreset_obj_start, + .dump = nf_tables_dumpreset_obj, + .done = nf_tables_dump_obj_done, + .module = THIS_MODULE, + .data = (void *)nla, + }; + + return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c); + } + + if (!try_module_get(THIS_MODULE)) + return -EINVAL; + rcu_read_unlock(); + mutex_lock(&nft_net->commit_mutex); + skb2 = nf_tables_getobj_single(portid, info, nla, true); + mutex_unlock(&nft_net->commit_mutex); + rcu_read_lock(); + module_put(THIS_MODULE); + + if (IS_ERR(skb2)) + return PTR_ERR(skb2); + + buf = kasprintf(GFP_ATOMIC, "%.*s:%u", + nla_len(nla[NFTA_OBJ_TABLE]), + (char *)nla_data(nla[NFTA_OBJ_TABLE]), + nft_net->base_seq); + audit_log_nfcfg(buf, info->nfmsg->nfgen_family, 1, + AUDIT_NFT_OP_OBJ_RESET, GFP_ATOMIC); + kfree(buf); + + return nfnetlink_unicast(skb2, net, portid); } static void nft_obj_destroy(const struct nft_ctx *ctx, struct nft_object *obj) @@ -9356,7 +9500,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .policy = nft_obj_policy, }, [NFT_MSG_GETOBJ_RESET] = { - .call = nf_tables_getobj, + .call = nf_tables_getobj_reset, .type = NFNL_CB_RCU, .attr_count = NFTA_OBJ_MAX, .policy = nft_obj_policy, @@ -9418,51 +9562,53 @@ static int nf_tables_validate(struct net *net) * * We defer the drop policy until the transaction has been finalized. */ -static void nft_chain_commit_drop_policy(struct nft_trans *trans) +static void nft_chain_commit_drop_policy(struct nft_trans_chain *trans) { struct nft_base_chain *basechain; - if (nft_trans_chain_policy(trans) != NF_DROP) + if (trans->policy != NF_DROP) return; - if (!nft_is_base_chain(trans->ctx.chain)) + if (!nft_is_base_chain(trans->chain)) return; - basechain = nft_base_chain(trans->ctx.chain); + basechain = nft_base_chain(trans->chain); basechain->policy = NF_DROP; } -static void nft_chain_commit_update(struct nft_trans *trans) +static void nft_chain_commit_update(struct nft_trans_chain *trans) { + struct nft_table *table = trans->nft_trans_binding.nft_trans.table; struct nft_base_chain *basechain; - if (nft_trans_chain_name(trans)) { - rhltable_remove(&trans->ctx.table->chains_ht, - &trans->ctx.chain->rhlhead, + if (trans->name) { + rhltable_remove(&table->chains_ht, + &trans->chain->rhlhead, nft_chain_ht_params); - swap(trans->ctx.chain->name, nft_trans_chain_name(trans)); - rhltable_insert_key(&trans->ctx.table->chains_ht, - trans->ctx.chain->name, - &trans->ctx.chain->rhlhead, + swap(trans->chain->name, trans->name); + rhltable_insert_key(&table->chains_ht, + trans->chain->name, + &trans->chain->rhlhead, nft_chain_ht_params); } - if (!nft_is_base_chain(trans->ctx.chain)) + if (!nft_is_base_chain(trans->chain)) return; nft_chain_stats_replace(trans); - basechain = nft_base_chain(trans->ctx.chain); + basechain = nft_base_chain(trans->chain); - switch (nft_trans_chain_policy(trans)) { + switch (trans->policy) { case NF_DROP: case NF_ACCEPT: - basechain->policy = nft_trans_chain_policy(trans); + basechain->policy = trans->policy; break; } } -static void nft_obj_commit_update(struct nft_trans *trans) +static void nft_obj_commit_update(const struct nft_ctx *ctx, + struct nft_trans *trans) { struct nft_object *newobj; struct nft_object *obj; @@ -9474,15 +9620,21 @@ static void nft_obj_commit_update(struct nft_trans *trans) return; obj->ops->update(obj, newobj); - nft_obj_destroy(&trans->ctx, newobj); + nft_obj_destroy(ctx, newobj); } static void nft_commit_release(struct nft_trans *trans) { + struct nft_ctx ctx = { + .net = trans->net, + }; + + nft_ctx_update(&ctx, trans); + switch (trans->msg_type) { case NFT_MSG_DELTABLE: case NFT_MSG_DESTROYTABLE: - nf_tables_table_destroy(&trans->ctx); + nf_tables_table_destroy(trans->table); break; case NFT_MSG_NEWCHAIN: free_percpu(nft_trans_chain_stats(trans)); @@ -9493,25 +9645,25 @@ static void nft_commit_release(struct nft_trans *trans) if (nft_trans_chain_update(trans)) nft_hooks_destroy(&nft_trans_chain_hooks(trans)); else - nf_tables_chain_destroy(&trans->ctx); + nf_tables_chain_destroy(nft_trans_chain(trans)); break; case NFT_MSG_DELRULE: case NFT_MSG_DESTROYRULE: - nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans)); + nf_tables_rule_destroy(&ctx, nft_trans_rule(trans)); break; case NFT_MSG_DELSET: case NFT_MSG_DESTROYSET: - nft_set_destroy(&trans->ctx, nft_trans_set(trans)); + nft_set_destroy(&ctx, nft_trans_set(trans)); break; case NFT_MSG_DELSETELEM: case NFT_MSG_DESTROYSETELEM: - nf_tables_set_elem_destroy(&trans->ctx, + nf_tables_set_elem_destroy(&ctx, nft_trans_elem_set(trans), nft_trans_elem_priv(trans)); break; case NFT_MSG_DELOBJ: case NFT_MSG_DESTROYOBJ: - nft_obj_destroy(&trans->ctx, nft_trans_obj(trans)); + nft_obj_destroy(&ctx, nft_trans_obj(trans)); break; case NFT_MSG_DELFLOWTABLE: case NFT_MSG_DESTROYFLOWTABLE: @@ -9523,7 +9675,7 @@ static void nft_commit_release(struct nft_trans *trans) } if (trans->put_net) - put_net(trans->ctx.net); + put_net(trans->net); kfree(trans); } @@ -9642,10 +9794,10 @@ static void nf_tables_commit_chain_prepare_cancel(struct net *net) struct nft_trans *trans, *next; list_for_each_entry_safe(trans, next, &nft_net->commit_list, list) { - struct nft_chain *chain = trans->ctx.chain; - if (trans->msg_type == NFT_MSG_NEWRULE || trans->msg_type == NFT_MSG_DELRULE) { + struct nft_chain *chain = nft_trans_rule_chain(trans); + kvfree(chain->blob_next); chain->blob_next = NULL; } @@ -10003,7 +10155,7 @@ static void nf_tables_commit_release(struct net *net) trans = list_last_entry(&nft_net->commit_list, struct nft_trans, list); - get_net(trans->ctx.net); + get_net(trans->net); WARN_ON_ONCE(trans->put_net); trans->put_net = true; @@ -10147,12 +10299,15 @@ static void nft_gc_seq_end(struct nftables_pernet *nft_net, unsigned int gc_seq) static int nf_tables_commit(struct net *net, struct sk_buff *skb) { struct nftables_pernet *nft_net = nft_pernet(net); + const struct nlmsghdr *nlh = nlmsg_hdr(skb); + struct nft_trans_binding *trans_binding; struct nft_trans *trans, *next; unsigned int base_seq, gc_seq; LIST_HEAD(set_update_list); struct nft_trans_elem *te; struct nft_chain *chain; struct nft_table *table; + struct nft_ctx ctx; LIST_HEAD(adl); int err; @@ -10161,7 +10316,10 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) return 0; } - list_for_each_entry(trans, &nft_net->binding_list, binding_list) { + nft_ctx_init(&ctx, net, skb, nlh, NFPROTO_UNSPEC, NULL, NULL, NULL); + + list_for_each_entry(trans_binding, &nft_net->binding_list, binding_list) { + trans = &trans_binding->nft_trans; switch (trans->msg_type) { case NFT_MSG_NEWSET: if (!nft_trans_set_update(trans) && @@ -10179,6 +10337,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) return -EINVAL; } break; + default: + WARN_ONCE(1, "Unhandled bind type %d", trans->msg_type); + break; } } @@ -10194,9 +10355,10 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) /* 1. Allocate space for next generation rules_gen_X[] */ list_for_each_entry_safe(trans, next, &nft_net->commit_list, list) { + struct nft_table *table = trans->table; int ret; - ret = nf_tables_commit_audit_alloc(&adl, trans->ctx.table); + ret = nf_tables_commit_audit_alloc(&adl, table); if (ret) { nf_tables_commit_chain_prepare_cancel(net); nf_tables_commit_audit_free(&adl); @@ -10204,7 +10366,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) } if (trans->msg_type == NFT_MSG_NEWRULE || trans->msg_type == NFT_MSG_DELRULE) { - chain = trans->ctx.chain; + chain = nft_trans_rule_chain(trans); ret = nf_tables_commit_chain_prepare(net, chain); if (ret < 0) { @@ -10237,70 +10399,71 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) net->nft.gencursor = nft_gencursor_next(net); list_for_each_entry_safe(trans, next, &nft_net->commit_list, list) { - nf_tables_commit_audit_collect(&adl, trans->ctx.table, - trans->msg_type); + struct nft_table *table = trans->table; + + nft_ctx_update(&ctx, trans); + + nf_tables_commit_audit_collect(&adl, table, trans->msg_type); switch (trans->msg_type) { case NFT_MSG_NEWTABLE: if (nft_trans_table_update(trans)) { - if (!(trans->ctx.table->flags & __NFT_TABLE_F_UPDATE)) { + if (!(table->flags & __NFT_TABLE_F_UPDATE)) { nft_trans_destroy(trans); break; } - if (trans->ctx.table->flags & NFT_TABLE_F_DORMANT) - nf_tables_table_disable(net, trans->ctx.table); + if (table->flags & NFT_TABLE_F_DORMANT) + nf_tables_table_disable(net, table); - trans->ctx.table->flags &= ~__NFT_TABLE_F_UPDATE; + table->flags &= ~__NFT_TABLE_F_UPDATE; } else { - nft_clear(net, trans->ctx.table); + nft_clear(net, table); } - nf_tables_table_notify(&trans->ctx, NFT_MSG_NEWTABLE); + nf_tables_table_notify(&ctx, NFT_MSG_NEWTABLE); nft_trans_destroy(trans); break; case NFT_MSG_DELTABLE: case NFT_MSG_DESTROYTABLE: - list_del_rcu(&trans->ctx.table->list); - nf_tables_table_notify(&trans->ctx, trans->msg_type); + list_del_rcu(&table->list); + nf_tables_table_notify(&ctx, trans->msg_type); break; case NFT_MSG_NEWCHAIN: if (nft_trans_chain_update(trans)) { - nft_chain_commit_update(trans); - nf_tables_chain_notify(&trans->ctx, NFT_MSG_NEWCHAIN, + nft_chain_commit_update(nft_trans_container_chain(trans)); + nf_tables_chain_notify(&ctx, NFT_MSG_NEWCHAIN, &nft_trans_chain_hooks(trans)); list_splice(&nft_trans_chain_hooks(trans), &nft_trans_basechain(trans)->hook_list); /* trans destroyed after rcu grace period */ } else { - nft_chain_commit_drop_policy(trans); - nft_clear(net, trans->ctx.chain); - nf_tables_chain_notify(&trans->ctx, NFT_MSG_NEWCHAIN, NULL); + nft_chain_commit_drop_policy(nft_trans_container_chain(trans)); + nft_clear(net, nft_trans_chain(trans)); + nf_tables_chain_notify(&ctx, NFT_MSG_NEWCHAIN, NULL); nft_trans_destroy(trans); } break; case NFT_MSG_DELCHAIN: case NFT_MSG_DESTROYCHAIN: if (nft_trans_chain_update(trans)) { - nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN, + nf_tables_chain_notify(&ctx, NFT_MSG_DELCHAIN, &nft_trans_chain_hooks(trans)); - if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT)) { + if (!(table->flags & NFT_TABLE_F_DORMANT)) { nft_netdev_unregister_hooks(net, &nft_trans_chain_hooks(trans), true); } } else { - nft_chain_del(trans->ctx.chain); - nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN, + nft_chain_del(nft_trans_chain(trans)); + nf_tables_chain_notify(&ctx, NFT_MSG_DELCHAIN, NULL); - nf_tables_unregister_hook(trans->ctx.net, - trans->ctx.table, - trans->ctx.chain); + nf_tables_unregister_hook(ctx.net, ctx.table, + nft_trans_chain(trans)); } break; case NFT_MSG_NEWRULE: - nft_clear(trans->ctx.net, nft_trans_rule(trans)); - nf_tables_rule_notify(&trans->ctx, - nft_trans_rule(trans), + nft_clear(net, nft_trans_rule(trans)); + nf_tables_rule_notify(&ctx, nft_trans_rule(trans), NFT_MSG_NEWRULE); - if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD) + if (nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD) nft_flow_rule_destroy(nft_trans_flow_rule(trans)); nft_trans_destroy(trans); @@ -10308,17 +10471,16 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) case NFT_MSG_DELRULE: case NFT_MSG_DESTROYRULE: list_del_rcu(&nft_trans_rule(trans)->list); - nf_tables_rule_notify(&trans->ctx, - nft_trans_rule(trans), + nf_tables_rule_notify(&ctx, nft_trans_rule(trans), trans->msg_type); - nft_rule_expr_deactivate(&trans->ctx, - nft_trans_rule(trans), + nft_rule_expr_deactivate(&ctx, nft_trans_rule(trans), NFT_TRANS_COMMIT); - if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD) + if (nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD) nft_flow_rule_destroy(nft_trans_flow_rule(trans)); break; case NFT_MSG_NEWSET: + list_del(&nft_trans_container_set(trans)->list_trans_newset); if (nft_trans_set_update(trans)) { struct nft_set *set = nft_trans_set(trans); @@ -10334,9 +10496,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) */ if (nft_set_is_anonymous(nft_trans_set(trans)) && !list_empty(&nft_trans_set(trans)->bindings)) - nft_use_dec(&trans->ctx.table->use); + nft_use_dec(&table->use); } - nf_tables_set_notify(&trans->ctx, nft_trans_set(trans), + nf_tables_set_notify(&ctx, nft_trans_set(trans), NFT_MSG_NEWSET, GFP_KERNEL); nft_trans_destroy(trans); break; @@ -10344,14 +10506,29 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) case NFT_MSG_DESTROYSET: nft_trans_set(trans)->dead = 1; list_del_rcu(&nft_trans_set(trans)->list); - nf_tables_set_notify(&trans->ctx, nft_trans_set(trans), + nf_tables_set_notify(&ctx, nft_trans_set(trans), trans->msg_type, GFP_KERNEL); break; case NFT_MSG_NEWSETELEM: - te = (struct nft_trans_elem *)trans->data; + te = nft_trans_container_elem(trans); + + if (te->update_flags) { + const struct nft_set_ext *ext = + nft_set_elem_ext(te->set, te->elem_priv); + + if (te->update_flags & NFT_TRANS_UPD_TIMEOUT) { + WRITE_ONCE(nft_set_ext_timeout(ext)->timeout, + te->timeout); + } + if (te->update_flags & NFT_TRANS_UPD_EXPIRATION) { + WRITE_ONCE(nft_set_ext_timeout(ext)->expiration, + get_jiffies_64() + te->expiration); + } + } else { + nft_setelem_activate(net, te->set, te->elem_priv); + } - nft_setelem_activate(net, te->set, te->elem_priv); - nf_tables_setelem_notify(&trans->ctx, te->set, + nf_tables_setelem_notify(&ctx, te->set, te->elem_priv, NFT_MSG_NEWSETELEM); if (te->set->ops->commit && @@ -10363,9 +10540,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) break; case NFT_MSG_DELSETELEM: case NFT_MSG_DESTROYSETELEM: - te = (struct nft_trans_elem *)trans->data; + te = nft_trans_container_elem(trans); - nf_tables_setelem_notify(&trans->ctx, te->set, + nf_tables_setelem_notify(&ctx, te->set, te->elem_priv, trans->msg_type); nft_setelem_remove(net, te->set, te->elem_priv); @@ -10381,13 +10558,13 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) break; case NFT_MSG_NEWOBJ: if (nft_trans_obj_update(trans)) { - nft_obj_commit_update(trans); - nf_tables_obj_notify(&trans->ctx, + nft_obj_commit_update(&ctx, trans); + nf_tables_obj_notify(&ctx, nft_trans_obj(trans), NFT_MSG_NEWOBJ); } else { nft_clear(net, nft_trans_obj(trans)); - nf_tables_obj_notify(&trans->ctx, + nf_tables_obj_notify(&ctx, nft_trans_obj(trans), NFT_MSG_NEWOBJ); nft_trans_destroy(trans); @@ -10396,14 +10573,14 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) case NFT_MSG_DELOBJ: case NFT_MSG_DESTROYOBJ: nft_obj_del(nft_trans_obj(trans)); - nf_tables_obj_notify(&trans->ctx, nft_trans_obj(trans), + nf_tables_obj_notify(&ctx, nft_trans_obj(trans), trans->msg_type); break; case NFT_MSG_NEWFLOWTABLE: if (nft_trans_flowtable_update(trans)) { nft_trans_flowtable(trans)->data.flags = nft_trans_flowtable_flags(trans); - nf_tables_flowtable_notify(&trans->ctx, + nf_tables_flowtable_notify(&ctx, nft_trans_flowtable(trans), &nft_trans_flowtable_hooks(trans), NFT_MSG_NEWFLOWTABLE); @@ -10411,7 +10588,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) &nft_trans_flowtable(trans)->hook_list); } else { nft_clear(net, nft_trans_flowtable(trans)); - nf_tables_flowtable_notify(&trans->ctx, + nf_tables_flowtable_notify(&ctx, nft_trans_flowtable(trans), NULL, NFT_MSG_NEWFLOWTABLE); @@ -10421,7 +10598,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) case NFT_MSG_DELFLOWTABLE: case NFT_MSG_DESTROYFLOWTABLE: if (nft_trans_flowtable_update(trans)) { - nf_tables_flowtable_notify(&trans->ctx, + nf_tables_flowtable_notify(&ctx, nft_trans_flowtable(trans), &nft_trans_flowtable_hooks(trans), trans->msg_type); @@ -10429,7 +10606,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) &nft_trans_flowtable_hooks(trans)); } else { list_del_rcu(&nft_trans_flowtable(trans)->list); - nf_tables_flowtable_notify(&trans->ctx, + nf_tables_flowtable_notify(&ctx, nft_trans_flowtable(trans), NULL, trans->msg_type); @@ -10471,28 +10648,32 @@ static void nf_tables_module_autoload(struct net *net) static void nf_tables_abort_release(struct nft_trans *trans) { + struct nft_ctx ctx = { }; + + nft_ctx_update(&ctx, trans); + switch (trans->msg_type) { case NFT_MSG_NEWTABLE: - nf_tables_table_destroy(&trans->ctx); + nf_tables_table_destroy(trans->table); break; case NFT_MSG_NEWCHAIN: if (nft_trans_chain_update(trans)) nft_hooks_destroy(&nft_trans_chain_hooks(trans)); else - nf_tables_chain_destroy(&trans->ctx); + nf_tables_chain_destroy(nft_trans_chain(trans)); break; case NFT_MSG_NEWRULE: - nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans)); + nf_tables_rule_destroy(&ctx, nft_trans_rule(trans)); break; case NFT_MSG_NEWSET: - nft_set_destroy(&trans->ctx, nft_trans_set(trans)); + nft_set_destroy(&ctx, nft_trans_set(trans)); break; case NFT_MSG_NEWSETELEM: nft_set_elem_destroy(nft_trans_elem_set(trans), nft_trans_elem_priv(trans), true); break; case NFT_MSG_NEWOBJ: - nft_obj_destroy(&trans->ctx, nft_trans_obj(trans)); + nft_obj_destroy(&ctx, nft_trans_obj(trans)); break; case NFT_MSG_NEWFLOWTABLE: if (nft_trans_flowtable_update(trans)) @@ -10524,6 +10705,9 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) struct nft_trans *trans, *next; LIST_HEAD(set_update_list); struct nft_trans_elem *te; + struct nft_ctx ctx = { + .net = net, + }; int err = 0; if (action == NFNL_ABORT_VALIDATE && @@ -10532,37 +10716,41 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) list_for_each_entry_safe_reverse(trans, next, &nft_net->commit_list, list) { + struct nft_table *table = trans->table; + + nft_ctx_update(&ctx, trans); + switch (trans->msg_type) { case NFT_MSG_NEWTABLE: if (nft_trans_table_update(trans)) { - if (!(trans->ctx.table->flags & __NFT_TABLE_F_UPDATE)) { + if (!(table->flags & __NFT_TABLE_F_UPDATE)) { nft_trans_destroy(trans); break; } - if (trans->ctx.table->flags & __NFT_TABLE_F_WAS_DORMANT) { - nf_tables_table_disable(net, trans->ctx.table); - trans->ctx.table->flags |= NFT_TABLE_F_DORMANT; - } else if (trans->ctx.table->flags & __NFT_TABLE_F_WAS_AWAKEN) { - trans->ctx.table->flags &= ~NFT_TABLE_F_DORMANT; + if (table->flags & __NFT_TABLE_F_WAS_DORMANT) { + nf_tables_table_disable(net, table); + table->flags |= NFT_TABLE_F_DORMANT; + } else if (table->flags & __NFT_TABLE_F_WAS_AWAKEN) { + table->flags &= ~NFT_TABLE_F_DORMANT; } - if (trans->ctx.table->flags & __NFT_TABLE_F_WAS_ORPHAN) { - trans->ctx.table->flags &= ~NFT_TABLE_F_OWNER; - trans->ctx.table->nlpid = 0; + if (table->flags & __NFT_TABLE_F_WAS_ORPHAN) { + table->flags &= ~NFT_TABLE_F_OWNER; + table->nlpid = 0; } - trans->ctx.table->flags &= ~__NFT_TABLE_F_UPDATE; + table->flags &= ~__NFT_TABLE_F_UPDATE; nft_trans_destroy(trans); } else { - list_del_rcu(&trans->ctx.table->list); + list_del_rcu(&table->list); } break; case NFT_MSG_DELTABLE: case NFT_MSG_DESTROYTABLE: - nft_clear(trans->ctx.net, trans->ctx.table); + nft_clear(trans->net, table); nft_trans_destroy(trans); break; case NFT_MSG_NEWCHAIN: if (nft_trans_chain_update(trans)) { - if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT)) { + if (!(table->flags & NFT_TABLE_F_DORMANT)) { nft_netdev_unregister_hooks(net, &nft_trans_chain_hooks(trans), true); @@ -10575,11 +10763,10 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) nft_trans_destroy(trans); break; } - nft_use_dec_restore(&trans->ctx.table->use); - nft_chain_del(trans->ctx.chain); - nf_tables_unregister_hook(trans->ctx.net, - trans->ctx.table, - trans->ctx.chain); + nft_use_dec_restore(&table->use); + nft_chain_del(nft_trans_chain(trans)); + nf_tables_unregister_hook(trans->net, table, + nft_trans_chain(trans)); } break; case NFT_MSG_DELCHAIN: @@ -10588,8 +10775,8 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) list_splice(&nft_trans_chain_hooks(trans), &nft_trans_basechain(trans)->hook_list); } else { - nft_use_inc_restore(&trans->ctx.table->use); - nft_clear(trans->ctx.net, trans->ctx.chain); + nft_use_inc_restore(&table->use); + nft_clear(trans->net, nft_trans_chain(trans)); } nft_trans_destroy(trans); break; @@ -10598,30 +10785,31 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) nft_trans_destroy(trans); break; } - nft_use_dec_restore(&trans->ctx.chain->use); + nft_use_dec_restore(&nft_trans_rule_chain(trans)->use); list_del_rcu(&nft_trans_rule(trans)->list); - nft_rule_expr_deactivate(&trans->ctx, + nft_rule_expr_deactivate(&ctx, nft_trans_rule(trans), NFT_TRANS_ABORT); - if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD) + if (nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD) nft_flow_rule_destroy(nft_trans_flow_rule(trans)); break; case NFT_MSG_DELRULE: case NFT_MSG_DESTROYRULE: - nft_use_inc_restore(&trans->ctx.chain->use); - nft_clear(trans->ctx.net, nft_trans_rule(trans)); - nft_rule_expr_activate(&trans->ctx, nft_trans_rule(trans)); - if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD) + nft_use_inc_restore(&nft_trans_rule_chain(trans)->use); + nft_clear(trans->net, nft_trans_rule(trans)); + nft_rule_expr_activate(&ctx, nft_trans_rule(trans)); + if (nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD) nft_flow_rule_destroy(nft_trans_flow_rule(trans)); nft_trans_destroy(trans); break; case NFT_MSG_NEWSET: + list_del(&nft_trans_container_set(trans)->list_trans_newset); if (nft_trans_set_update(trans)) { nft_trans_destroy(trans); break; } - nft_use_dec_restore(&trans->ctx.table->use); + nft_use_dec_restore(&table->use); if (nft_trans_set_bound(trans)) { nft_trans_destroy(trans); break; @@ -10631,20 +10819,24 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) break; case NFT_MSG_DELSET: case NFT_MSG_DESTROYSET: - nft_use_inc_restore(&trans->ctx.table->use); - nft_clear(trans->ctx.net, nft_trans_set(trans)); + nft_use_inc_restore(&table->use); + nft_clear(trans->net, nft_trans_set(trans)); if (nft_trans_set(trans)->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) - nft_map_activate(&trans->ctx, nft_trans_set(trans)); + nft_map_activate(&ctx, nft_trans_set(trans)); nft_trans_destroy(trans); break; case NFT_MSG_NEWSETELEM: - if (nft_trans_elem_set_bound(trans)) { + if (nft_trans_elem_update_flags(trans) || + nft_trans_elem_set_bound(trans)) { nft_trans_destroy(trans); break; } - te = (struct nft_trans_elem *)trans->data; - nft_setelem_remove(net, te->set, te->elem_priv); + te = nft_trans_container_elem(trans); + if (!te->set->ops->abort || + nft_setelem_is_catchall(te->set, te->elem_priv)) + nft_setelem_remove(net, te->set, te->elem_priv); + if (!nft_setelem_is_catchall(te->set, te->elem_priv)) atomic_dec(&te->set->nelems); @@ -10656,7 +10848,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) break; case NFT_MSG_DELSETELEM: case NFT_MSG_DESTROYSETELEM: - te = (struct nft_trans_elem *)trans->data; + te = nft_trans_container_elem(trans); if (!nft_setelem_active_next(net, te->set, te->elem_priv)) { nft_setelem_data_activate(net, te->set, te->elem_priv); @@ -10674,17 +10866,17 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) break; case NFT_MSG_NEWOBJ: if (nft_trans_obj_update(trans)) { - nft_obj_destroy(&trans->ctx, nft_trans_obj_newobj(trans)); + nft_obj_destroy(&ctx, nft_trans_obj_newobj(trans)); nft_trans_destroy(trans); } else { - nft_use_dec_restore(&trans->ctx.table->use); + nft_use_dec_restore(&table->use); nft_obj_del(nft_trans_obj(trans)); } break; case NFT_MSG_DELOBJ: case NFT_MSG_DESTROYOBJ: - nft_use_inc_restore(&trans->ctx.table->use); - nft_clear(trans->ctx.net, nft_trans_obj(trans)); + nft_use_inc_restore(&table->use); + nft_clear(trans->net, nft_trans_obj(trans)); nft_trans_destroy(trans); break; case NFT_MSG_NEWFLOWTABLE: @@ -10692,7 +10884,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) nft_unregister_flowtable_net_hooks(net, &nft_trans_flowtable_hooks(trans)); } else { - nft_use_dec_restore(&trans->ctx.table->use); + nft_use_dec_restore(&table->use); list_del_rcu(&nft_trans_flowtable(trans)->list); nft_unregister_flowtable_net_hooks(net, &nft_trans_flowtable(trans)->hook_list); @@ -10704,14 +10896,16 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) list_splice(&nft_trans_flowtable_hooks(trans), &nft_trans_flowtable(trans)->hook_list); } else { - nft_use_inc_restore(&trans->ctx.table->use); - nft_clear(trans->ctx.net, nft_trans_flowtable(trans)); + nft_use_inc_restore(&table->use); + nft_clear(trans->net, nft_trans_flowtable(trans)); } nft_trans_destroy(trans); break; } } + WARN_ON_ONCE(!list_empty(&nft_net->commit_set_list)); + nft_set_abort_update(&set_update_list); synchronize_rcu(); @@ -10810,150 +11004,6 @@ int nft_chain_validate_hooks(const struct nft_chain *chain, } EXPORT_SYMBOL_GPL(nft_chain_validate_hooks); -/* - * Loop detection - walk through the ruleset beginning at the destination chain - * of a new jump until either the source chain is reached (loop) or all - * reachable chains have been traversed. - * - * The loop check is performed whenever a new jump verdict is added to an - * expression or verdict map or a verdict map is bound to a new chain. - */ - -static int nf_tables_check_loops(const struct nft_ctx *ctx, - const struct nft_chain *chain); - -static int nft_check_loops(const struct nft_ctx *ctx, - const struct nft_set_ext *ext) -{ - const struct nft_data *data; - int ret; - - data = nft_set_ext_data(ext); - switch (data->verdict.code) { - case NFT_JUMP: - case NFT_GOTO: - ret = nf_tables_check_loops(ctx, data->verdict.chain); - break; - default: - ret = 0; - break; - } - - return ret; -} - -static int nf_tables_loop_check_setelem(const struct nft_ctx *ctx, - struct nft_set *set, - const struct nft_set_iter *iter, - struct nft_elem_priv *elem_priv) -{ - const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); - - if (!nft_set_elem_active(ext, iter->genmask)) - return 0; - - if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) && - *nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END) - return 0; - - return nft_check_loops(ctx, ext); -} - -static int nft_set_catchall_loops(const struct nft_ctx *ctx, - struct nft_set *set) -{ - u8 genmask = nft_genmask_next(ctx->net); - struct nft_set_elem_catchall *catchall; - struct nft_set_ext *ext; - int ret = 0; - - list_for_each_entry_rcu(catchall, &set->catchall_list, list) { - ext = nft_set_elem_ext(set, catchall->elem); - if (!nft_set_elem_active(ext, genmask)) - continue; - - ret = nft_check_loops(ctx, ext); - if (ret < 0) - return ret; - } - - return ret; -} - -static int nf_tables_check_loops(const struct nft_ctx *ctx, - const struct nft_chain *chain) -{ - const struct nft_rule *rule; - const struct nft_expr *expr, *last; - struct nft_set *set; - struct nft_set_binding *binding; - struct nft_set_iter iter; - - if (ctx->chain == chain) - return -ELOOP; - - if (fatal_signal_pending(current)) - return -EINTR; - - list_for_each_entry(rule, &chain->rules, list) { - nft_rule_for_each_expr(expr, last, rule) { - struct nft_immediate_expr *priv; - const struct nft_data *data; - int err; - - if (strcmp(expr->ops->type->name, "immediate")) - continue; - - priv = nft_expr_priv(expr); - if (priv->dreg != NFT_REG_VERDICT) - continue; - - data = &priv->data; - switch (data->verdict.code) { - case NFT_JUMP: - case NFT_GOTO: - err = nf_tables_check_loops(ctx, - data->verdict.chain); - if (err < 0) - return err; - break; - default: - break; - } - } - } - - list_for_each_entry(set, &ctx->table->sets, list) { - if (!nft_is_active_next(ctx->net, set)) - continue; - if (!(set->flags & NFT_SET_MAP) || - set->dtype != NFT_DATA_VERDICT) - continue; - - list_for_each_entry(binding, &set->bindings, list) { - if (!(binding->flags & NFT_SET_MAP) || - binding->chain != chain) - continue; - - iter.genmask = nft_genmask_next(ctx->net); - iter.type = NFT_ITER_UPDATE; - iter.skip = 0; - iter.count = 0; - iter.err = 0; - iter.fn = nf_tables_loop_check_setelem; - - set->ops->walk(ctx, set, &iter); - if (!iter.err) - iter.err = nft_set_catchall_loops(ctx, set); - - if (iter.err < 0) - return iter.err; - } - } - - return 0; -} - /** * nft_parse_u32_check - fetch u32 attribute and check for maximum value * @@ -11032,10 +11082,11 @@ static int nft_validate_register_load(enum nft_registers reg, unsigned int len) return 0; } -int nft_parse_register_load(const struct nlattr *attr, u8 *sreg, u32 len) +int nft_parse_register_load(const struct nft_ctx *ctx, + const struct nlattr *attr, u8 *sreg, u32 len) { - u32 reg; - int err; + int err, invalid_reg; + u32 reg, next_register; err = nft_parse_register(attr, ®); if (err < 0) @@ -11045,11 +11096,36 @@ int nft_parse_register_load(const struct nlattr *attr, u8 *sreg, u32 len) if (err < 0) return err; + next_register = DIV_ROUND_UP(len, NFT_REG32_SIZE) + reg; + + /* Can't happen: nft_validate_register_load() should have failed */ + if (WARN_ON_ONCE(next_register > NFT_REG32_NUM)) + return -EINVAL; + + /* find first register that did not see an earlier store. */ + invalid_reg = find_next_zero_bit(ctx->reg_inited, NFT_REG32_NUM, reg); + + /* invalid register within the range that we're loading from? */ + if (invalid_reg < next_register) + return -ENODATA; + *sreg = reg; return 0; } EXPORT_SYMBOL_GPL(nft_parse_register_load); +static void nft_saw_register_store(const struct nft_ctx *__ctx, + int reg, unsigned int len) +{ + unsigned int registers = DIV_ROUND_UP(len, NFT_REG32_SIZE); + struct nft_ctx *ctx = (struct nft_ctx *)__ctx; + + if (WARN_ON_ONCE(len == 0 || reg < 0)) + return; + + bitmap_set(ctx->reg_inited, reg, registers); +} + static int nft_validate_register_store(const struct nft_ctx *ctx, enum nft_registers reg, const struct nft_data *data, @@ -11066,13 +11142,16 @@ static int nft_validate_register_store(const struct nft_ctx *ctx, if (data != NULL && (data->verdict.code == NFT_GOTO || data->verdict.code == NFT_JUMP)) { - err = nf_tables_check_loops(ctx, data->verdict.chain); + err = nft_chain_validate(ctx, data->verdict.chain); if (err < 0) return err; } - return 0; + break; default: + if (type != NFT_DATA_VALUE) + return -EINVAL; + if (reg < NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE) return -EINVAL; if (len == 0) @@ -11081,10 +11160,11 @@ static int nft_validate_register_store(const struct nft_ctx *ctx, sizeof_field(struct nft_regs, data)) return -ERANGE; - if (data != NULL && type != NFT_DATA_VALUE) - return -EINVAL; - return 0; + break; } + + nft_saw_register_store(ctx, reg, len); + return 0; } int nft_parse_register_store(const struct nft_ctx *ctx, @@ -11365,7 +11445,7 @@ int __nft_release_basechain(struct nft_ctx *ctx) } nft_chain_del(ctx->chain); nft_use_dec(&ctx->table->use); - nf_tables_chain_destroy(ctx); + nf_tables_chain_destroy(ctx->chain); return 0; } @@ -11440,12 +11520,11 @@ static void __nft_release_table(struct net *net, struct nft_table *table) nft_obj_destroy(&ctx, obj); } list_for_each_entry_safe(chain, nc, &table->chains, list) { - ctx.chain = chain; nft_chain_del(chain); nft_use_dec(&table->use); - nf_tables_chain_destroy(&ctx); + nf_tables_chain_destroy(chain); } - nf_tables_table_destroy(&ctx); + nf_tables_table_destroy(table); } static void __nft_release_tables(struct net *net) @@ -11483,8 +11562,7 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event, gc_seq = nft_gc_seq_begin(nft_net); - if (!list_empty(&nf_tables_destroy_list)) - nf_tables_trans_destroy_flush_work(); + nf_tables_trans_destroy_flush_work(); again: list_for_each_entry(table, &nft_net->tables, list) { if (nft_table_has_owner(table) && @@ -11526,6 +11604,7 @@ static int __net_init nf_tables_init_net(struct net *net) INIT_LIST_HEAD(&nft_net->tables); INIT_LIST_HEAD(&nft_net->commit_list); + INIT_LIST_HEAD(&nft_net->commit_set_list); INIT_LIST_HEAD(&nft_net->binding_list); INIT_LIST_HEAD(&nft_net->module_list); INIT_LIST_HEAD(&nft_net->notify_list); @@ -11556,6 +11635,7 @@ static void __net_exit nf_tables_exit_net(struct net *net) gc_seq = nft_gc_seq_begin(nft_net); WARN_ON_ONCE(!list_empty(&nft_net->commit_list)); + WARN_ON_ONCE(!list_empty(&nft_net->commit_set_list)); if (!list_empty(&nft_net->module_list)) nf_tables_module_autoload_cleanup(net); @@ -11588,6 +11668,14 @@ static int __init nf_tables_module_init(void) { int err; + BUILD_BUG_ON(offsetof(struct nft_trans_table, nft_trans) != 0); + BUILD_BUG_ON(offsetof(struct nft_trans_chain, nft_trans_binding.nft_trans) != 0); + BUILD_BUG_ON(offsetof(struct nft_trans_rule, nft_trans) != 0); + BUILD_BUG_ON(offsetof(struct nft_trans_set, nft_trans_binding.nft_trans) != 0); + BUILD_BUG_ON(offsetof(struct nft_trans_elem, nft_trans) != 0); + BUILD_BUG_ON(offsetof(struct nft_trans_obj, nft_trans) != 0); + BUILD_BUG_ON(offsetof(struct nft_trans_flowtable, nft_trans) != 0); + err = register_pernet_subsys(&nf_tables_net_ops); if (err < 0) return err; diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index a48d5f0e2f3e..75598520b0fa 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -256,7 +256,7 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv) const struct net *net = nft_net(pkt); const struct nft_expr *expr, *last; const struct nft_rule_dp *rule; - struct nft_regs regs = {}; + struct nft_regs regs; unsigned int stackptr = 0; struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE]; bool genbit = READ_ONCE(net->nft.gencursor); diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c index 12ab78fa5d84..64675f1c7f29 100644 --- a/net/netfilter/nf_tables_offload.c +++ b/net/netfilter/nf_tables_offload.c @@ -513,38 +513,38 @@ static void nft_flow_rule_offload_abort(struct net *net, int err = 0; list_for_each_entry_continue_reverse(trans, &nft_net->commit_list, list) { - if (trans->ctx.family != NFPROTO_NETDEV) + if (trans->table->family != NFPROTO_NETDEV) continue; switch (trans->msg_type) { case NFT_MSG_NEWCHAIN: - if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD) || + if (!(nft_trans_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD) || nft_trans_chain_update(trans)) continue; - err = nft_flow_offload_chain(trans->ctx.chain, NULL, + err = nft_flow_offload_chain(nft_trans_chain(trans), NULL, FLOW_BLOCK_UNBIND); break; case NFT_MSG_DELCHAIN: - if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)) + if (!(nft_trans_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD)) continue; - err = nft_flow_offload_chain(trans->ctx.chain, NULL, + err = nft_flow_offload_chain(nft_trans_chain(trans), NULL, FLOW_BLOCK_BIND); break; case NFT_MSG_NEWRULE: - if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)) + if (!(nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD)) continue; - err = nft_flow_offload_rule(trans->ctx.chain, + err = nft_flow_offload_rule(nft_trans_rule_chain(trans), nft_trans_rule(trans), NULL, FLOW_CLS_DESTROY); break; case NFT_MSG_DELRULE: - if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)) + if (!(nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD)) continue; - err = nft_flow_offload_rule(trans->ctx.chain, + err = nft_flow_offload_rule(nft_trans_rule_chain(trans), nft_trans_rule(trans), nft_trans_flow_rule(trans), FLOW_CLS_REPLACE); @@ -564,46 +564,46 @@ int nft_flow_rule_offload_commit(struct net *net) u8 policy; list_for_each_entry(trans, &nft_net->commit_list, list) { - if (trans->ctx.family != NFPROTO_NETDEV) + if (trans->table->family != NFPROTO_NETDEV) continue; switch (trans->msg_type) { case NFT_MSG_NEWCHAIN: - if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD) || + if (!(nft_trans_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD) || nft_trans_chain_update(trans)) continue; policy = nft_trans_chain_policy(trans); - err = nft_flow_offload_chain(trans->ctx.chain, &policy, + err = nft_flow_offload_chain(nft_trans_chain(trans), &policy, FLOW_BLOCK_BIND); break; case NFT_MSG_DELCHAIN: - if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)) + if (!(nft_trans_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD)) continue; policy = nft_trans_chain_policy(trans); - err = nft_flow_offload_chain(trans->ctx.chain, &policy, + err = nft_flow_offload_chain(nft_trans_chain(trans), &policy, FLOW_BLOCK_UNBIND); break; case NFT_MSG_NEWRULE: - if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)) + if (!(nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD)) continue; - if (trans->ctx.flags & NLM_F_REPLACE || - !(trans->ctx.flags & NLM_F_APPEND)) { + if (trans->flags & NLM_F_REPLACE || + !(trans->flags & NLM_F_APPEND)) { err = -EOPNOTSUPP; break; } - err = nft_flow_offload_rule(trans->ctx.chain, + err = nft_flow_offload_rule(nft_trans_rule_chain(trans), nft_trans_rule(trans), nft_trans_flow_rule(trans), FLOW_CLS_REPLACE); break; case NFT_MSG_DELRULE: - if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)) + if (!(nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD)) continue; - err = nft_flow_offload_rule(trans->ctx.chain, + err = nft_flow_offload_rule(nft_trans_rule_chain(trans), nft_trans_rule(trans), NULL, FLOW_CLS_DESTROY); break; diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c index a83637e3f455..580c55268f65 100644 --- a/net/netfilter/nf_tables_trace.c +++ b/net/netfilter/nf_tables_trace.c @@ -317,7 +317,7 @@ void nft_trace_init(struct nft_traceinfo *info, const struct nft_pktinfo *pkt, net_get_random_once(&trace_key, sizeof(trace_key)); info->skbid = (u32)siphash_3u32(hash32_ptr(skb), - skb_get_hash(skb), + skb_get_hash_net(nft_net(pkt), skb), skb->skb_iif, &trace_key); } diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 4abf660c7baf..7784ec094097 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -402,33 +402,35 @@ replay_abort: { nfnl_unlock(subsys_id); netlink_ack(oskb, nlh, -EOPNOTSUPP, NULL); - return kfree_skb(skb); + return consume_skb(skb); } } if (!ss->valid_genid || !ss->commit || !ss->abort) { nfnl_unlock(subsys_id); netlink_ack(oskb, nlh, -EOPNOTSUPP, NULL); - return kfree_skb(skb); + return consume_skb(skb); } if (!try_module_get(ss->owner)) { nfnl_unlock(subsys_id); netlink_ack(oskb, nlh, -EOPNOTSUPP, NULL); - return kfree_skb(skb); + return consume_skb(skb); } if (!ss->valid_genid(net, genid)) { module_put(ss->owner); nfnl_unlock(subsys_id); netlink_ack(oskb, nlh, -ERESTART, NULL); - return kfree_skb(skb); + return consume_skb(skb); } nfnl_unlock(subsys_id); - if (nlh->nlmsg_flags & NLM_F_ACK) + if (nlh->nlmsg_flags & NLM_F_ACK) { + memset(&extack, 0, sizeof(extack)); nfnl_err_add(&err_list, nlh, 0, &extack); + } while (skb->len >= nlmsg_total_size(0)) { int msglen, type; @@ -565,7 +567,7 @@ done: if (status & NFNL_BATCH_REPLAY) { ss->abort(net, oskb, NFNL_ABORT_AUTOLOAD); nfnl_err_reset(&err_list); - kfree_skb(skb); + consume_skb(skb); module_put(ss->owner); goto replay; } else if (status == NFNL_BATCH_DONE) { @@ -577,6 +579,7 @@ done: ss->abort(net, oskb, NFNL_ABORT_NONE); netlink_ack(oskb, nlmsg_hdr(oskb), err, NULL); } else if (nlh->nlmsg_flags & NLM_F_ACK) { + memset(&extack, 0, sizeof(extack)); nfnl_err_add(&err_list, nlh, 0, &extack); } } else { @@ -590,7 +593,7 @@ done: err = ss->abort(net, oskb, abort_action); if (err == -EAGAIN) { nfnl_err_reset(&err_list); - kfree_skb(skb); + consume_skb(skb); module_put(ss->owner); status |= NFNL_BATCH_FAILURE; goto replay_abort; @@ -598,7 +601,7 @@ done: } nfnl_err_deliver(&err_list, oskb); - kfree_skb(skb); + consume_skb(skb); module_put(ss->owner); } diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index f466af4f8531..eab4f476b47f 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -366,8 +366,7 @@ static int cttimeout_default_set(struct sk_buff *skb, __u8 l4num; int ret; - if (!cda[CTA_TIMEOUT_L3PROTO] || - !cda[CTA_TIMEOUT_L4PROTO] || + if (!cda[CTA_TIMEOUT_L4PROTO] || !cda[CTA_TIMEOUT_DATA]) return -EINVAL; diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index f1c31757e496..d2773ce9b585 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -325,7 +325,7 @@ static void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) hooks = nf_hook_entries_head(net, pf, entry->state.hook); i = entry->hook_index; - if (WARN_ON_ONCE(!hooks || i >= hooks->num_hook_entries)) { + if (!hooks || i >= hooks->num_hook_entries) { kfree_skb_reason(skb, SKB_DROP_REASON_NETFILTER_DROP); nf_queue_entry_free(entry); return; @@ -540,6 +540,14 @@ nla_put_failure: return -1; } +static int nf_queue_checksum_help(struct sk_buff *entskb) +{ + if (skb_csum_is_sctp(entskb)) + return skb_crc32c_csum_help(entskb); + + return skb_checksum_help(entskb); +} + static struct sk_buff * nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, struct nf_queue_entry *entry, @@ -602,7 +610,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, case NFQNL_COPY_PACKET: if (!(queue->flags & NFQA_CFG_F_GSO) && entskb->ip_summed == CHECKSUM_PARTIAL && - skb_checksum_help(entskb)) + nf_queue_checksum_help(entskb)) return NULL; data_len = READ_ONCE(queue->copy_range); @@ -820,10 +828,41 @@ static bool nf_ct_drop_unconfirmed(const struct nf_queue_entry *entry) { #if IS_ENABLED(CONFIG_NF_CONNTRACK) static const unsigned long flags = IPS_CONFIRMED | IPS_DYING; - const struct nf_conn *ct = (void *)skb_nfct(entry->skb); + struct nf_conn *ct = (void *)skb_nfct(entry->skb); + unsigned long status; + unsigned int use; + + if (!ct) + return false; - if (ct && ((ct->status & flags) == IPS_DYING)) + status = READ_ONCE(ct->status); + if ((status & flags) == IPS_DYING) return true; + + if (status & IPS_CONFIRMED) + return false; + + /* in some cases skb_clone() can occur after initial conntrack + * pickup, but conntrack assumes exclusive skb->_nfct ownership for + * unconfirmed entries. + * + * This happens for br_netfilter and with ip multicast routing. + * We can't be solved with serialization here because one clone could + * have been queued for local delivery. + */ + use = refcount_read(&ct->ct_general.use); + if (likely(use == 1)) + return false; + + /* Can't decrement further? Exclusive ownership. */ + if (!refcount_dec_not_one(&ct->ct_general.use)) + return false; + + skb_set_nfct(entry->skb, 0); + /* No nf_ct_put(): we already decremented .use and it cannot + * drop down to 0. + */ + return true; #endif return false; } @@ -983,7 +1022,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) break; } - if ((queue->flags & NFQA_CFG_F_GSO) || !skb_is_gso(skb)) + if (!skb_is_gso(skb) || ((queue->flags & NFQA_CFG_F_GSO) && !skb_is_gso_sctp(skb))) return __nfqnl_enqueue_packet(net, queue, entry); nf_bridge_adjust_skb_data(skb); diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c index ca857afbf061..7de95674fd8c 100644 --- a/net/netfilter/nft_bitwise.c +++ b/net/netfilter/nft_bitwise.c @@ -171,7 +171,7 @@ static int nft_bitwise_init(const struct nft_ctx *ctx, priv->len = len; - err = nft_parse_register_load(tb[NFTA_BITWISE_SREG], &priv->sreg, + err = nft_parse_register_load(ctx, tb[NFTA_BITWISE_SREG], &priv->sreg, priv->len); if (err < 0) return err; @@ -365,7 +365,7 @@ static int nft_bitwise_fast_init(const struct nft_ctx *ctx, struct nft_bitwise_fast_expr *priv = nft_expr_priv(expr); int err; - err = nft_parse_register_load(tb[NFTA_BITWISE_SREG], &priv->sreg, + err = nft_parse_register_load(ctx, tb[NFTA_BITWISE_SREG], &priv->sreg, sizeof(u32)); if (err < 0) return err; diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c index f6e791a68101..2f82a444d21b 100644 --- a/net/netfilter/nft_byteorder.c +++ b/net/netfilter/nft_byteorder.c @@ -139,7 +139,7 @@ static int nft_byteorder_init(const struct nft_ctx *ctx, priv->len = len; - err = nft_parse_register_load(tb[NFTA_BYTEORDER_SREG], &priv->sreg, + err = nft_parse_register_load(ctx, tb[NFTA_BYTEORDER_SREG], &priv->sreg, priv->len); if (err < 0) return err; diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c index cd4652259095..2605f43737bc 100644 --- a/net/netfilter/nft_cmp.c +++ b/net/netfilter/nft_cmp.c @@ -83,7 +83,7 @@ static int nft_cmp_init(const struct nft_ctx *ctx, const struct nft_expr *expr, if (err < 0) return err; - err = nft_parse_register_load(tb[NFTA_CMP_SREG], &priv->sreg, desc.len); + err = nft_parse_register_load(ctx, tb[NFTA_CMP_SREG], &priv->sreg, desc.len); if (err < 0) return err; @@ -222,7 +222,7 @@ static int nft_cmp_fast_init(const struct nft_ctx *ctx, if (err < 0) return err; - err = nft_parse_register_load(tb[NFTA_CMP_SREG], &priv->sreg, desc.len); + err = nft_parse_register_load(ctx, tb[NFTA_CMP_SREG], &priv->sreg, desc.len); if (err < 0) return err; @@ -323,7 +323,7 @@ static int nft_cmp16_fast_init(const struct nft_ctx *ctx, if (err < 0) return err; - err = nft_parse_register_load(tb[NFTA_CMP_SREG], &priv->sreg, desc.len); + err = nft_parse_register_load(ctx, tb[NFTA_CMP_SREG], &priv->sreg, desc.len); if (err < 0) return err; diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index d3d11dede545..52cdfee17f73 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -350,8 +350,7 @@ nla_put_failure: } static int nft_target_validate(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nft_data **data) + const struct nft_expr *expr) { struct xt_target *target = expr->ops->data; unsigned int hook_mask = 0; @@ -611,8 +610,7 @@ static int nft_match_large_dump(struct sk_buff *skb, } static int nft_match_validate(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nft_data **data) + const struct nft_expr *expr) { struct xt_match *match = expr->ops->data; unsigned int hook_mask = 0; diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c index 291ed2026367..cc7325329496 100644 --- a/net/netfilter/nft_counter.c +++ b/net/netfilter/nft_counter.c @@ -8,7 +8,7 @@ #include <linux/kernel.h> #include <linux/init.h> #include <linux/module.h> -#include <linux/seqlock.h> +#include <linux/u64_stats_sync.h> #include <linux/netlink.h> #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> @@ -17,6 +17,11 @@ #include <net/netfilter/nf_tables_offload.h> struct nft_counter { + u64_stats_t bytes; + u64_stats_t packets; +}; + +struct nft_counter_tot { s64 bytes; s64 packets; }; @@ -25,25 +30,24 @@ struct nft_counter_percpu_priv { struct nft_counter __percpu *counter; }; -static DEFINE_PER_CPU(seqcount_t, nft_counter_seq); +static DEFINE_PER_CPU(struct u64_stats_sync, nft_counter_sync); static inline void nft_counter_do_eval(struct nft_counter_percpu_priv *priv, struct nft_regs *regs, const struct nft_pktinfo *pkt) { + struct u64_stats_sync *nft_sync; struct nft_counter *this_cpu; - seqcount_t *myseq; local_bh_disable(); this_cpu = this_cpu_ptr(priv->counter); - myseq = this_cpu_ptr(&nft_counter_seq); - - write_seqcount_begin(myseq); + nft_sync = this_cpu_ptr(&nft_counter_sync); - this_cpu->bytes += pkt->skb->len; - this_cpu->packets++; + u64_stats_update_begin(nft_sync); + u64_stats_add(&this_cpu->bytes, pkt->skb->len); + u64_stats_inc(&this_cpu->packets); + u64_stats_update_end(nft_sync); - write_seqcount_end(myseq); local_bh_enable(); } @@ -66,17 +70,16 @@ static int nft_counter_do_init(const struct nlattr * const tb[], if (cpu_stats == NULL) return -ENOMEM; - preempt_disable(); - this_cpu = this_cpu_ptr(cpu_stats); + this_cpu = raw_cpu_ptr(cpu_stats); if (tb[NFTA_COUNTER_PACKETS]) { - this_cpu->packets = - be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS])); + u64_stats_set(&this_cpu->packets, + be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS]))); } if (tb[NFTA_COUNTER_BYTES]) { - this_cpu->bytes = - be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES])); + u64_stats_set(&this_cpu->bytes, + be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES]))); } - preempt_enable(); + priv->counter = cpu_stats; return 0; } @@ -104,35 +107,41 @@ static void nft_counter_obj_destroy(const struct nft_ctx *ctx, } static void nft_counter_reset(struct nft_counter_percpu_priv *priv, - struct nft_counter *total) + struct nft_counter_tot *total) { + struct u64_stats_sync *nft_sync; struct nft_counter *this_cpu; local_bh_disable(); this_cpu = this_cpu_ptr(priv->counter); - this_cpu->packets -= total->packets; - this_cpu->bytes -= total->bytes; + nft_sync = this_cpu_ptr(&nft_counter_sync); + + u64_stats_update_begin(nft_sync); + u64_stats_add(&this_cpu->packets, -total->packets); + u64_stats_add(&this_cpu->bytes, -total->bytes); + u64_stats_update_end(nft_sync); + local_bh_enable(); } static void nft_counter_fetch(struct nft_counter_percpu_priv *priv, - struct nft_counter *total) + struct nft_counter_tot *total) { struct nft_counter *this_cpu; - const seqcount_t *myseq; u64 bytes, packets; unsigned int seq; int cpu; memset(total, 0, sizeof(*total)); for_each_possible_cpu(cpu) { - myseq = per_cpu_ptr(&nft_counter_seq, cpu); + struct u64_stats_sync *nft_sync = per_cpu_ptr(&nft_counter_sync, cpu); + this_cpu = per_cpu_ptr(priv->counter, cpu); do { - seq = read_seqcount_begin(myseq); - bytes = this_cpu->bytes; - packets = this_cpu->packets; - } while (read_seqcount_retry(myseq, seq)); + seq = u64_stats_fetch_begin(nft_sync); + bytes = u64_stats_read(&this_cpu->bytes); + packets = u64_stats_read(&this_cpu->packets); + } while (u64_stats_fetch_retry(nft_sync, seq)); total->bytes += bytes; total->packets += packets; @@ -143,7 +152,7 @@ static int nft_counter_do_dump(struct sk_buff *skb, struct nft_counter_percpu_priv *priv, bool reset) { - struct nft_counter total; + struct nft_counter_tot total; nft_counter_fetch(priv, &total); @@ -232,7 +241,7 @@ static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src, g struct nft_counter_percpu_priv *priv_clone = nft_expr_priv(dst); struct nft_counter __percpu *cpu_stats; struct nft_counter *this_cpu; - struct nft_counter total; + struct nft_counter_tot total; nft_counter_fetch(priv, &total); @@ -240,11 +249,9 @@ static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src, g if (cpu_stats == NULL) return -ENOMEM; - preempt_disable(); - this_cpu = this_cpu_ptr(cpu_stats); - this_cpu->packets = total.packets; - this_cpu->bytes = total.bytes; - preempt_enable(); + this_cpu = raw_cpu_ptr(cpu_stats); + u64_stats_set(&this_cpu->packets, total.packets); + u64_stats_set(&this_cpu->bytes, total.bytes); priv_clone->counter = cpu_stats; return 0; @@ -262,18 +269,18 @@ static void nft_counter_offload_stats(struct nft_expr *expr, const struct flow_stats *stats) { struct nft_counter_percpu_priv *priv = nft_expr_priv(expr); + struct u64_stats_sync *nft_sync; struct nft_counter *this_cpu; - seqcount_t *myseq; - preempt_disable(); + local_bh_disable(); this_cpu = this_cpu_ptr(priv->counter); - myseq = this_cpu_ptr(&nft_counter_seq); + nft_sync = this_cpu_ptr(&nft_counter_sync); - write_seqcount_begin(myseq); - this_cpu->packets += stats->pkts; - this_cpu->bytes += stats->bytes; - write_seqcount_end(myseq); - preempt_enable(); + u64_stats_update_begin(nft_sync); + u64_stats_add(&this_cpu->packets, stats->pkts); + u64_stats_add(&this_cpu->bytes, stats->bytes); + u64_stats_update_end(nft_sync); + local_bh_enable(); } void nft_counter_init_seqcount(void) @@ -281,7 +288,7 @@ void nft_counter_init_seqcount(void) int cpu; for_each_possible_cpu(cpu) - seqcount_init(per_cpu_ptr(&nft_counter_seq, cpu)); + u64_stats_init(per_cpu_ptr(&nft_counter_sync, cpu)); } struct nft_expr_type nft_counter_type; diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 452ed94c3a4d..67a41cd2baaf 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -606,7 +606,7 @@ static int nft_ct_set_init(const struct nft_ctx *ctx, } priv->len = len; - err = nft_parse_register_load(tb[NFTA_CT_SREG], &priv->sreg, len); + err = nft_parse_register_load(ctx, tb[NFTA_CT_SREG], &priv->sreg, len); if (err < 0) goto err1; diff --git a/net/netfilter/nft_dup_netdev.c b/net/netfilter/nft_dup_netdev.c index e5739a59ebf1..0573f96ce079 100644 --- a/net/netfilter/nft_dup_netdev.c +++ b/net/netfilter/nft_dup_netdev.c @@ -40,7 +40,7 @@ static int nft_dup_netdev_init(const struct nft_ctx *ctx, if (tb[NFTA_DUP_SREG_DEV] == NULL) return -EINVAL; - return nft_parse_register_load(tb[NFTA_DUP_SREG_DEV], &priv->sreg_dev, + return nft_parse_register_load(ctx, tb[NFTA_DUP_SREG_DEV], &priv->sreg_dev, sizeof(int)); } diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index b4ada3ab2167..88922e0e8e83 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -56,7 +56,7 @@ static struct nft_elem_priv *nft_dynset_new(struct nft_set *set, if (!atomic_add_unless(&set->nelems, 1, set->size)) return NULL; - timeout = priv->timeout ? : set->timeout; + timeout = priv->timeout ? : READ_ONCE(set->timeout); elem_priv = nft_set_elem_init(set, &priv->tmpl, ®s->data[priv->sreg_key], NULL, ®s->data[priv->sreg_data], @@ -94,9 +94,10 @@ void nft_dynset_eval(const struct nft_expr *expr, if (set->ops->update(set, ®s->data[priv->sreg_key], nft_dynset_new, expr, regs, &ext)) { if (priv->op == NFT_DYNSET_OP_UPDATE && - nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) { - timeout = priv->timeout ? : set->timeout; - *nft_set_ext_expiration(ext) = get_jiffies_64() + timeout; + nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT) && + READ_ONCE(nft_set_ext_timeout(ext)->timeout) != 0) { + timeout = priv->timeout ? : READ_ONCE(set->timeout); + WRITE_ONCE(nft_set_ext_timeout(ext)->expiration, get_jiffies_64() + timeout); } nft_set_elem_update_expr(ext, regs, pkt); @@ -215,7 +216,7 @@ static int nft_dynset_init(const struct nft_ctx *ctx, return err; } - err = nft_parse_register_load(tb[NFTA_DYNSET_SREG_KEY], &priv->sreg_key, + err = nft_parse_register_load(ctx, tb[NFTA_DYNSET_SREG_KEY], &priv->sreg_key, set->klen); if (err < 0) return err; @@ -226,7 +227,7 @@ static int nft_dynset_init(const struct nft_ctx *ctx, if (set->dtype == NFT_DATA_VERDICT) return -EOPNOTSUPP; - err = nft_parse_register_load(tb[NFTA_DYNSET_SREG_DATA], + err = nft_parse_register_load(ctx, tb[NFTA_DYNSET_SREG_DATA], &priv->sreg_data, set->dlen); if (err < 0) return err; @@ -312,12 +313,9 @@ static int nft_dynset_init(const struct nft_ctx *ctx, if (priv->num_exprs) nft_dynset_ext_add_expr(priv); - if (set->flags & NFT_SET_TIMEOUT) { - if (timeout || set->timeout) { - nft_set_ext_add(&priv->tmpl, NFT_SET_EXT_TIMEOUT); - nft_set_ext_add(&priv->tmpl, NFT_SET_EXT_EXPIRATION); - } - } + if (set->flags & NFT_SET_TIMEOUT && + (timeout || READ_ONCE(set->timeout))) + nft_set_ext_add(&priv->tmpl, NFT_SET_EXT_TIMEOUT); priv->timeout = timeout; diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index 6eb571d0c3fd..6bfd33516241 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -588,7 +588,7 @@ static int nft_exthdr_tcp_set_init(const struct nft_ctx *ctx, priv->flags = flags; priv->op = op; - return nft_parse_register_load(tb[NFTA_EXTHDR_SREG], &priv->sreg, + return nft_parse_register_load(ctx, tb[NFTA_EXTHDR_SREG], &priv->sreg, priv->len); } diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c index b58f62195ff3..96e02a83c045 100644 --- a/net/netfilter/nft_fib.c +++ b/net/netfilter/nft_fib.c @@ -26,8 +26,7 @@ const struct nla_policy nft_fib_policy[NFTA_FIB_MAX + 1] = { }; EXPORT_SYMBOL(nft_fib_policy); -int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, - const struct nft_data **data) +int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr) { const struct nft_fib *priv = nft_expr_priv(expr); unsigned int hooks; diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c index ab9576098701..2f732fae5a83 100644 --- a/net/netfilter/nft_flow_offload.c +++ b/net/netfilter/nft_flow_offload.c @@ -9,6 +9,7 @@ #include <linux/netfilter/nf_conntrack_common.h> #include <linux/netfilter/nf_tables.h> #include <net/ip.h> /* for ipv4 options. */ +#include <net/inet_dscp.h> #include <net/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_core.h> #include <net/netfilter/nf_conntrack_core.h> @@ -235,7 +236,7 @@ static int nft_flow_route(const struct nft_pktinfo *pkt, fl.u.ip4.saddr = ct->tuplehash[!dir].tuple.src.u3.ip; fl.u.ip4.flowi4_oif = nft_in(pkt)->ifindex; fl.u.ip4.flowi4_iif = this_dst->dev->ifindex; - fl.u.ip4.flowi4_tos = RT_TOS(ip_hdr(pkt->skb)->tos); + fl.u.ip4.flowi4_tos = ip_hdr(pkt->skb)->tos & INET_DSCP_MASK; fl.u.ip4.flowi4_mark = pkt->skb->mark; fl.u.ip4.flowi4_flags = FLOWI_FLAG_ANYSRC; break; @@ -380,8 +381,7 @@ out: } static int nft_flow_offload_validate(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nft_data **data) + const struct nft_expr *expr) { unsigned int hook_mask = (1 << NF_INET_FORWARD); diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c index 358e742afad7..152a9fb4d23a 100644 --- a/net/netfilter/nft_fwd_netdev.c +++ b/net/netfilter/nft_fwd_netdev.c @@ -52,7 +52,7 @@ static int nft_fwd_netdev_init(const struct nft_ctx *ctx, if (tb[NFTA_FWD_SREG_DEV] == NULL) return -EINVAL; - return nft_parse_register_load(tb[NFTA_FWD_SREG_DEV], &priv->sreg_dev, + return nft_parse_register_load(ctx, tb[NFTA_FWD_SREG_DEV], &priv->sreg_dev, sizeof(int)); } @@ -178,12 +178,12 @@ static int nft_fwd_neigh_init(const struct nft_ctx *ctx, return -EOPNOTSUPP; } - err = nft_parse_register_load(tb[NFTA_FWD_SREG_DEV], &priv->sreg_dev, + err = nft_parse_register_load(ctx, tb[NFTA_FWD_SREG_DEV], &priv->sreg_dev, sizeof(int)); if (err < 0) return err; - return nft_parse_register_load(tb[NFTA_FWD_SREG_ADDR], &priv->sreg_addr, + return nft_parse_register_load(ctx, tb[NFTA_FWD_SREG_ADDR], &priv->sreg_addr, addr_len); } @@ -204,8 +204,7 @@ nla_put_failure: } static int nft_fwd_validate(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nft_data **data) + const struct nft_expr *expr) { return nft_chain_validate_hooks(ctx->chain, (1 << NF_NETDEV_INGRESS) | (1 << NF_NETDEV_EGRESS)); diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 92d47e469204..5d034bbb6913 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -51,7 +51,8 @@ static void nft_symhash_eval(const struct nft_expr *expr, struct sk_buff *skb = pkt->skb; u32 h; - h = reciprocal_scale(__skb_get_hash_symmetric(skb), priv->modulus); + h = reciprocal_scale(__skb_get_hash_symmetric_net(nft_net(pkt), skb), + priv->modulus); regs->data[priv->dreg] = h + priv->offset; } @@ -91,7 +92,7 @@ static int nft_jhash_init(const struct nft_ctx *ctx, priv->len = len; - err = nft_parse_register_load(tb[NFTA_HASH_SREG], &priv->sreg, len); + err = nft_parse_register_load(ctx, tb[NFTA_HASH_SREG], &priv->sreg, len); if (err < 0) return err; diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index 6475c7abc1fe..02ee5fb69871 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -221,7 +221,7 @@ static void nft_immediate_destroy(const struct nft_ctx *ctx, list_del(&rule->list); nf_tables_rule_destroy(&chain_ctx, rule); } - nf_tables_chain_destroy(&chain_ctx); + nf_tables_chain_destroy(chain); break; default: break; @@ -244,8 +244,7 @@ nla_put_failure: } static int nft_immediate_validate(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nft_data **d) + const struct nft_expr *expr) { const struct nft_immediate_expr *priv = nft_expr_priv(expr); struct nft_ctx *pctx = (struct nft_ctx *)ctx; diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index b314ca728a29..63ef832b8aa7 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -113,7 +113,7 @@ static int nft_lookup_init(const struct nft_ctx *ctx, if (IS_ERR(set)) return PTR_ERR(set); - err = nft_parse_register_load(tb[NFTA_LOOKUP_SREG], &priv->sreg, + err = nft_parse_register_load(ctx, tb[NFTA_LOOKUP_SREG], &priv->sreg, set->klen); if (err < 0) return err; @@ -132,7 +132,8 @@ static int nft_lookup_init(const struct nft_ctx *ctx, return -EINVAL; err = nft_parse_register_store(ctx, tb[NFTA_LOOKUP_DREG], - &priv->dreg, NULL, set->dtype, + &priv->dreg, NULL, + nft_set_datatype(set), set->dlen); if (err < 0) return err; @@ -205,8 +206,7 @@ nla_put_failure: } static int nft_lookup_validate(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nft_data **d) + const struct nft_expr *expr) { const struct nft_lookup *priv = nft_expr_priv(expr); struct nft_set_iter iter; diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c index 8a14aaca93bb..868bd4d73555 100644 --- a/net/netfilter/nft_masq.c +++ b/net/netfilter/nft_masq.c @@ -27,8 +27,7 @@ static const struct nla_policy nft_masq_policy[NFTA_MASQ_MAX + 1] = { }; static int nft_masq_validate(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nft_data **data) + const struct nft_expr *expr) { int err; @@ -52,13 +51,13 @@ static int nft_masq_init(const struct nft_ctx *ctx, priv->flags = ntohl(nla_get_be32(tb[NFTA_MASQ_FLAGS])); if (tb[NFTA_MASQ_REG_PROTO_MIN]) { - err = nft_parse_register_load(tb[NFTA_MASQ_REG_PROTO_MIN], + err = nft_parse_register_load(ctx, tb[NFTA_MASQ_REG_PROTO_MIN], &priv->sreg_proto_min, plen); if (err < 0) return err; if (tb[NFTA_MASQ_REG_PROTO_MAX]) { - err = nft_parse_register_load(tb[NFTA_MASQ_REG_PROTO_MAX], + err = nft_parse_register_load(ctx, tb[NFTA_MASQ_REG_PROTO_MAX], &priv->sreg_proto_max, plen); if (err < 0) diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 9139ce38ea7b..8c8eb14d647b 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -581,8 +581,7 @@ static int nft_meta_get_validate_xfrm(const struct nft_ctx *ctx) } static int nft_meta_get_validate(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nft_data **data) + const struct nft_expr *expr) { const struct nft_meta *priv = nft_expr_priv(expr); @@ -600,8 +599,7 @@ static int nft_meta_get_validate(const struct nft_ctx *ctx, } int nft_meta_set_validate(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nft_data **data) + const struct nft_expr *expr) { struct nft_meta *priv = nft_expr_priv(expr); unsigned int hooks; @@ -657,7 +655,7 @@ int nft_meta_set_init(const struct nft_ctx *ctx, } priv->len = len; - err = nft_parse_register_load(tb[NFTA_META_SREG], &priv->sreg, len); + err = nft_parse_register_load(ctx, tb[NFTA_META_SREG], &priv->sreg, len); if (err < 0) return err; diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index 808f5802c270..6e21f72c5b57 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -137,8 +137,7 @@ static const struct nla_policy nft_nat_policy[NFTA_NAT_MAX + 1] = { }; static int nft_nat_validate(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nft_data **data) + const struct nft_expr *expr) { struct nft_nat *priv = nft_expr_priv(expr); int err; @@ -214,13 +213,13 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, priv->family = family; if (tb[NFTA_NAT_REG_ADDR_MIN]) { - err = nft_parse_register_load(tb[NFTA_NAT_REG_ADDR_MIN], + err = nft_parse_register_load(ctx, tb[NFTA_NAT_REG_ADDR_MIN], &priv->sreg_addr_min, alen); if (err < 0) return err; if (tb[NFTA_NAT_REG_ADDR_MAX]) { - err = nft_parse_register_load(tb[NFTA_NAT_REG_ADDR_MAX], + err = nft_parse_register_load(ctx, tb[NFTA_NAT_REG_ADDR_MAX], &priv->sreg_addr_max, alen); if (err < 0) @@ -234,13 +233,13 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, plen = sizeof_field(struct nf_nat_range, min_proto.all); if (tb[NFTA_NAT_REG_PROTO_MIN]) { - err = nft_parse_register_load(tb[NFTA_NAT_REG_PROTO_MIN], + err = nft_parse_register_load(ctx, tb[NFTA_NAT_REG_PROTO_MIN], &priv->sreg_proto_min, plen); if (err < 0) return err; if (tb[NFTA_NAT_REG_PROTO_MAX]) { - err = nft_parse_register_load(tb[NFTA_NAT_REG_PROTO_MAX], + err = nft_parse_register_load(ctx, tb[NFTA_NAT_REG_PROTO_MAX], &priv->sreg_proto_max, plen); if (err < 0) diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c index 509011b1ef59..09da7a3f9f96 100644 --- a/net/netfilter/nft_objref.c +++ b/net/netfilter/nft_objref.c @@ -143,7 +143,7 @@ static int nft_objref_map_init(const struct nft_ctx *ctx, if (!(set->flags & NFT_SET_OBJECT)) return -EINVAL; - err = nft_parse_register_load(tb[NFTA_OBJREF_SET_SREG], &priv->sreg, + err = nft_parse_register_load(ctx, tb[NFTA_OBJREF_SET_SREG], &priv->sreg, set->klen); if (err < 0) return err; diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c index 7fec57ff736f..1c0b493ef0a9 100644 --- a/net/netfilter/nft_osf.c +++ b/net/netfilter/nft_osf.c @@ -108,8 +108,7 @@ nla_put_failure: } static int nft_osf_validate(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nft_data **data) + const struct nft_expr *expr) { unsigned int hooks; diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 50429cbd42da..330609a76fb2 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -981,7 +981,7 @@ static int nft_payload_set_init(const struct nft_ctx *ctx, } priv->csum_type = csum_type; - return nft_parse_register_load(tb[NFTA_PAYLOAD_SREG], &priv->sreg, + return nft_parse_register_load(ctx, tb[NFTA_PAYLOAD_SREG], &priv->sreg, priv->len); } diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c index b2b8127c8d43..344fe311878f 100644 --- a/net/netfilter/nft_queue.c +++ b/net/netfilter/nft_queue.c @@ -69,8 +69,7 @@ static void nft_queue_sreg_eval(const struct nft_expr *expr, } static int nft_queue_validate(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nft_data **data) + const struct nft_expr *expr) { static const unsigned int supported_hooks = ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN) | @@ -136,7 +135,7 @@ static int nft_queue_sreg_init(const struct nft_ctx *ctx, struct nft_queue *priv = nft_expr_priv(expr); int err; - err = nft_parse_register_load(tb[NFTA_QUEUE_SREG_QNUM], + err = nft_parse_register_load(ctx, tb[NFTA_QUEUE_SREG_QNUM], &priv->sreg_qnum, sizeof(u32)); if (err < 0) return err; diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c index 51ae64cd268f..ea382f7bbd78 100644 --- a/net/netfilter/nft_range.c +++ b/net/netfilter/nft_range.c @@ -83,7 +83,7 @@ static int nft_range_init(const struct nft_ctx *ctx, const struct nft_expr *expr goto err2; } - err = nft_parse_register_load(tb[NFTA_RANGE_SREG], &priv->sreg, + err = nft_parse_register_load(ctx, tb[NFTA_RANGE_SREG], &priv->sreg, desc_from.len); if (err < 0) goto err2; diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c index a58bd8d291ff..95eedad85c83 100644 --- a/net/netfilter/nft_redir.c +++ b/net/netfilter/nft_redir.c @@ -27,8 +27,7 @@ static const struct nla_policy nft_redir_policy[NFTA_REDIR_MAX + 1] = { }; static int nft_redir_validate(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nft_data **data) + const struct nft_expr *expr) { int err; @@ -51,13 +50,13 @@ static int nft_redir_init(const struct nft_ctx *ctx, plen = sizeof_field(struct nf_nat_range, min_proto.all); if (tb[NFTA_REDIR_REG_PROTO_MIN]) { - err = nft_parse_register_load(tb[NFTA_REDIR_REG_PROTO_MIN], + err = nft_parse_register_load(ctx, tb[NFTA_REDIR_REG_PROTO_MIN], &priv->sreg_proto_min, plen); if (err < 0) return err; if (tb[NFTA_REDIR_REG_PROTO_MAX]) { - err = nft_parse_register_load(tb[NFTA_REDIR_REG_PROTO_MAX], + err = nft_parse_register_load(ctx, tb[NFTA_REDIR_REG_PROTO_MAX], &priv->sreg_proto_max, plen); if (err < 0) diff --git a/net/netfilter/nft_reject.c b/net/netfilter/nft_reject.c index ed2e668474d6..196a92c7ea09 100644 --- a/net/netfilter/nft_reject.c +++ b/net/netfilter/nft_reject.c @@ -24,8 +24,7 @@ const struct nla_policy nft_reject_policy[NFTA_REJECT_MAX + 1] = { EXPORT_SYMBOL_GPL(nft_reject_policy); int nft_reject_validate(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nft_data **data) + const struct nft_expr *expr) { return nft_chain_validate_hooks(ctx->chain, (1 << NF_INET_LOCAL_IN) | diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c index 973fa31a9dd6..49020e67304a 100644 --- a/net/netfilter/nft_reject_inet.c +++ b/net/netfilter/nft_reject_inet.c @@ -61,8 +61,7 @@ static void nft_reject_inet_eval(const struct nft_expr *expr, } static int nft_reject_inet_validate(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nft_data **data) + const struct nft_expr *expr) { return nft_chain_validate_hooks(ctx->chain, (1 << NF_INET_LOCAL_IN) | diff --git a/net/netfilter/nft_reject_netdev.c b/net/netfilter/nft_reject_netdev.c index 7865cd8b11bb..2558ce1505d9 100644 --- a/net/netfilter/nft_reject_netdev.c +++ b/net/netfilter/nft_reject_netdev.c @@ -145,8 +145,7 @@ out: } static int nft_reject_netdev_validate(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nft_data **data) + const struct nft_expr *expr) { return nft_chain_validate_hooks(ctx->chain, (1 << NF_NETDEV_INGRESS)); } diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c index 14d88394bcb7..dc50b9a5bd68 100644 --- a/net/netfilter/nft_rt.c +++ b/net/netfilter/nft_rt.c @@ -160,8 +160,7 @@ nla_put_failure: return -1; } -static int nft_rt_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, - const struct nft_data **data) +static int nft_rt_validate(const struct nft_ctx *ctx, const struct nft_expr *expr) { const struct nft_rt *priv = nft_expr_priv(expr); unsigned int hooks; diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 15a236bebb46..eb4c4a4ac7ac 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -434,7 +434,7 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set, res_map = scratch->map + (map_index ? m->bsize_max : 0); fill_map = scratch->map + (map_index ? 0 : m->bsize_max); - memset(res_map, 0xff, m->bsize_max * sizeof(*res_map)); + pipapo_resmap_init(m, res_map); nft_pipapo_for_each_field(f, i, m) { bool last = i == m->field_count - 1; @@ -542,7 +542,7 @@ static struct nft_pipapo_elem *pipapo_get(const struct net *net, goto out; } - memset(res_map, 0xff, m->bsize_max * sizeof(*res_map)); + pipapo_resmap_init(m, res_map); nft_pipapo_for_each_field(f, i, m) { bool last = i == m->field_count - 1; diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h index 0d2e40e10f7f..4a2ff85ce1c4 100644 --- a/net/netfilter/nft_set_pipapo.h +++ b/net/netfilter/nft_set_pipapo.h @@ -278,4 +278,25 @@ static u64 pipapo_estimate_size(const struct nft_set_desc *desc) return size; } +/** + * pipapo_resmap_init() - Initialise result map before first use + * @m: Matching data, including mapping table + * @res_map: Result map + * + * Initialize all bits covered by the first field to one, so that after + * the first step, only the matching bits of the first bit group remain. + * + * If other fields have a large bitmap, set remainder of res_map to 0. + */ +static inline void pipapo_resmap_init(const struct nft_pipapo_match *m, unsigned long *res_map) +{ + const struct nft_pipapo_field *f = m->f; + int i; + + for (i = 0; i < f->bsize; i++) + res_map[i] = ULONG_MAX; + + for (i = f->bsize; i < m->bsize_max; i++) + res_map[i] = 0ul; +} #endif /* _NFT_SET_PIPAPO_H */ diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c index d08407d589ea..b8d3c3213efe 100644 --- a/net/netfilter/nft_set_pipapo_avx2.c +++ b/net/netfilter/nft_set_pipapo_avx2.c @@ -1036,6 +1036,7 @@ nothing: /** * nft_pipapo_avx2_lookup_slow() - Fallback function for uncommon field sizes + * @mdata: Matching data, including mapping table * @map: Previous match result, used as initial bitmap * @fill: Destination bitmap to be filled with current match result * @f: Field, containing lookup and mapping tables @@ -1051,7 +1052,8 @@ nothing: * Return: -1 on no match, rule index of match if @last, otherwise first long * word index to be checked next (i.e. first filled word). */ -static int nft_pipapo_avx2_lookup_slow(unsigned long *map, unsigned long *fill, +static int nft_pipapo_avx2_lookup_slow(const struct nft_pipapo_match *mdata, + unsigned long *map, unsigned long *fill, const struct nft_pipapo_field *f, int offset, const u8 *pkt, bool first, bool last) @@ -1060,7 +1062,7 @@ static int nft_pipapo_avx2_lookup_slow(unsigned long *map, unsigned long *fill, int i, ret = -1, b; if (first) - memset(map, 0xff, bsize * sizeof(*map)); + pipapo_resmap_init(mdata, map); for (i = offset; i < bsize; i++) { if (f->bb == 8) @@ -1137,8 +1139,14 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, bool map_index; int i, ret = 0; - if (unlikely(!irq_fpu_usable())) - return nft_pipapo_lookup(net, set, key, ext); + local_bh_disable(); + + if (unlikely(!irq_fpu_usable())) { + bool fallback_res = nft_pipapo_lookup(net, set, key, ext); + + local_bh_enable(); + return fallback_res; + } m = rcu_dereference(priv->match); @@ -1153,6 +1161,7 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, scratch = *raw_cpu_ptr(m->scratch); if (unlikely(!scratch)) { kernel_fpu_end(); + local_bh_enable(); return false; } @@ -1186,7 +1195,7 @@ next_match: } else if (f->groups == 16) { NFT_SET_PIPAPO_AVX2_LOOKUP(8, 16); } else { - ret = nft_pipapo_avx2_lookup_slow(res, fill, f, + ret = nft_pipapo_avx2_lookup_slow(m, res, fill, f, ret, rp, first, last); } @@ -1202,7 +1211,7 @@ next_match: } else if (f->groups == 32) { NFT_SET_PIPAPO_AVX2_LOOKUP(4, 32); } else { - ret = nft_pipapo_avx2_lookup_slow(res, fill, f, + ret = nft_pipapo_avx2_lookup_slow(m, res, fill, f, ret, rp, first, last); } @@ -1233,6 +1242,7 @@ out: if (i % 2) scratch->map_index = !map_index; kernel_fpu_end(); + local_bh_enable(); return ret >= 0; } diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c index f30163e2ca62..f5da0c1775f2 100644 --- a/net/netfilter/nft_socket.c +++ b/net/netfilter/nft_socket.c @@ -9,7 +9,8 @@ struct nft_socket { enum nft_socket_keys key:8; - u8 level; + u8 level; /* cgroupv2 level to extract */ + u8 level_user; /* cgroupv2 level provided by userspace */ u8 len; union { u8 dreg; @@ -53,6 +54,28 @@ nft_sock_get_eval_cgroupv2(u32 *dest, struct sock *sk, const struct nft_pktinfo memcpy(dest, &cgid, sizeof(u64)); return true; } + +/* process context only, uses current->nsproxy. */ +static noinline int nft_socket_cgroup_subtree_level(void) +{ + struct cgroup *cgrp = cgroup_get_from_path("/"); + int level; + + if (IS_ERR(cgrp)) + return PTR_ERR(cgrp); + + level = cgrp->level; + + cgroup_put(cgrp); + + if (WARN_ON_ONCE(level > 255)) + return -ERANGE; + + if (WARN_ON_ONCE(level < 0)) + return -EINVAL; + + return level; +} #endif static struct sock *nft_socket_do_lookup(const struct nft_pktinfo *pkt) @@ -110,13 +133,13 @@ static void nft_socket_eval(const struct nft_expr *expr, *dest = READ_ONCE(sk->sk_mark); } else { regs->verdict.code = NFT_BREAK; - return; + goto out_put_sk; } break; case NFT_SOCKET_WILDCARD: if (!sk_fullsock(sk)) { regs->verdict.code = NFT_BREAK; - return; + goto out_put_sk; } nft_socket_wildcard(pkt, regs, sk, dest); break; @@ -124,7 +147,7 @@ static void nft_socket_eval(const struct nft_expr *expr, case NFT_SOCKET_CGROUPV2: if (!nft_sock_get_eval_cgroupv2(dest, sk, pkt, priv->level)) { regs->verdict.code = NFT_BREAK; - return; + goto out_put_sk; } break; #endif @@ -133,6 +156,7 @@ static void nft_socket_eval(const struct nft_expr *expr, regs->verdict.code = NFT_BREAK; } +out_put_sk: if (sk != skb->sk) sock_gen_put(sk); } @@ -173,9 +197,10 @@ static int nft_socket_init(const struct nft_ctx *ctx, case NFT_SOCKET_MARK: len = sizeof(u32); break; -#ifdef CONFIG_CGROUPS +#ifdef CONFIG_SOCK_CGROUP_DATA case NFT_SOCKET_CGROUPV2: { unsigned int level; + int err; if (!tb[NFTA_SOCKET_LEVEL]) return -EINVAL; @@ -184,6 +209,17 @@ static int nft_socket_init(const struct nft_ctx *ctx, if (level > 255) return -EOPNOTSUPP; + err = nft_socket_cgroup_subtree_level(); + if (err < 0) + return err; + + priv->level_user = level; + + level += err; + /* Implies a giant cgroup tree */ + if (WARN_ON_ONCE(level > 255)) + return -EOPNOTSUPP; + priv->level = level; len = sizeof(u64); break; @@ -208,7 +244,7 @@ static int nft_socket_dump(struct sk_buff *skb, if (nft_dump_register(skb, NFTA_SOCKET_DREG, priv->dreg)) return -1; if (priv->key == NFT_SOCKET_CGROUPV2 && - nla_put_be32(skb, NFTA_SOCKET_LEVEL, htonl(priv->level))) + nla_put_be32(skb, NFTA_SOCKET_LEVEL, htonl(priv->level_user))) return -1; return 0; } @@ -239,8 +275,7 @@ static bool nft_socket_reduce(struct nft_regs_track *track, } static int nft_socket_validate(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nft_data **data) + const struct nft_expr *expr) { if (ctx->family != NFPROTO_IPV4 && ctx->family != NFPROTO_IPV6 && diff --git a/net/netfilter/nft_synproxy.c b/net/netfilter/nft_synproxy.c index 1d737f89dfc1..5d3e51825985 100644 --- a/net/netfilter/nft_synproxy.c +++ b/net/netfilter/nft_synproxy.c @@ -248,8 +248,7 @@ static void nft_synproxy_eval(const struct nft_expr *expr, } static int nft_synproxy_validate(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nft_data **data) + const struct nft_expr *expr) { if (ctx->family != NFPROTO_IPV4 && ctx->family != NFPROTO_IPV6 && diff --git a/net/netfilter/nft_tproxy.c b/net/netfilter/nft_tproxy.c index 71412adb73d4..50481280abd2 100644 --- a/net/netfilter/nft_tproxy.c +++ b/net/netfilter/nft_tproxy.c @@ -254,14 +254,14 @@ static int nft_tproxy_init(const struct nft_ctx *ctx, } if (tb[NFTA_TPROXY_REG_ADDR]) { - err = nft_parse_register_load(tb[NFTA_TPROXY_REG_ADDR], + err = nft_parse_register_load(ctx, tb[NFTA_TPROXY_REG_ADDR], &priv->sreg_addr, alen); if (err < 0) return err; } if (tb[NFTA_TPROXY_REG_PORT]) { - err = nft_parse_register_load(tb[NFTA_TPROXY_REG_PORT], + err = nft_parse_register_load(ctx, tb[NFTA_TPROXY_REG_PORT], &priv->sreg_port, sizeof(u16)); if (err < 0) return err; @@ -313,8 +313,7 @@ static int nft_tproxy_dump(struct sk_buff *skb, } static int nft_tproxy_validate(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nft_data **data) + const struct nft_expr *expr) { if (ctx->family != NFPROTO_IPV4 && ctx->family != NFPROTO_IPV6 && diff --git a/net/netfilter/nft_xfrm.c b/net/netfilter/nft_xfrm.c index 1c866757db55..8a07b46cc8fb 100644 --- a/net/netfilter/nft_xfrm.c +++ b/net/netfilter/nft_xfrm.c @@ -229,8 +229,7 @@ static int nft_xfrm_get_dump(struct sk_buff *skb, return 0; } -static int nft_xfrm_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, - const struct nft_data **data) +static int nft_xfrm_validate(const struct nft_ctx *ctx, const struct nft_expr *expr) { const struct nft_xfrm *priv = nft_expr_priv(expr); unsigned int hooks; diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index 5d04ef80a61d..0e762277bcf8 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -86,6 +86,7 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par) { struct xt_connlimit_info *info = par->matchinfo; unsigned int keylen; + int ret; keylen = sizeof(u32); if (par->family == NFPROTO_IPV6) @@ -93,8 +94,17 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par) else keylen += sizeof(struct in_addr); + ret = nf_ct_netns_get(par->net, par->family); + if (ret < 0) { + pr_info_ratelimited("cannot load conntrack support for proto=%u\n", + par->family); + return ret; + } + /* init private data */ - info->data = nf_conncount_init(par->net, par->family, keylen); + info->data = nf_conncount_init(par->net, keylen); + if (IS_ERR(info->data)) + nf_ct_netns_put(par->net, par->family); return PTR_ERR_OR_ZERO(info->data); } @@ -103,7 +113,8 @@ static void connlimit_mt_destroy(const struct xt_mtdtor_param *par) { const struct xt_connlimit_info *info = par->matchinfo; - nf_conncount_destroy(par->net, par->family, info->data); + nf_conncount_destroy(par->net, info->data); + nf_ct_netns_put(par->net, par->family); } static struct xt_match connlimit_mt_reg __read_mostly = { diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index ef93e0d3bee0..588a5e6ad899 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -59,9 +59,9 @@ MODULE_PARM_DESC(ip_list_gid, "default owning group of /proc/net/xt_recent/* fil /* retained for backwards compatibility */ static unsigned int ip_pkt_list_tot __read_mostly; module_param(ip_pkt_list_tot, uint, 0400); -MODULE_PARM_DESC(ip_pkt_list_tot, "number of packets per IP address to remember (max. 255)"); +MODULE_PARM_DESC(ip_pkt_list_tot, "number of packets per IP address to remember (max. 65535)"); -#define XT_RECENT_MAX_NSTAMPS 256 +#define XT_RECENT_MAX_NSTAMPS 65536 struct recent_entry { struct list_head list; @@ -69,7 +69,7 @@ struct recent_entry { union nf_inet_addr addr; u_int16_t family; u_int8_t ttl; - u_int8_t index; + u_int16_t index; u_int16_t nstamps; unsigned long stamps[]; }; @@ -80,7 +80,7 @@ struct recent_table { union nf_inet_addr mask; unsigned int refcnt; unsigned int entries; - u8 nstamps_max_mask; + u_int16_t nstamps_max_mask; struct list_head lru_list; struct list_head iphash[]; }; |