diff options
Diffstat (limited to 'net/netfilter/nft_set_pipapo.c')
-rw-r--r-- | net/netfilter/nft_set_pipapo.c | 258 |
1 files changed, 123 insertions, 135 deletions
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 187138afac45..15a236bebb46 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -504,6 +504,7 @@ out: * pipapo_get() - Get matching element reference given key data * @net: Network namespace * @set: nftables API set representation + * @m: storage containing active/existing elements * @data: Key data to be matched against existing elements * @genmask: If set, check that element is active in given genmask * @tstamp: timestamp to check for expired elements @@ -517,17 +518,15 @@ out: */ static struct nft_pipapo_elem *pipapo_get(const struct net *net, const struct nft_set *set, + const struct nft_pipapo_match *m, const u8 *data, u8 genmask, u64 tstamp, gfp_t gfp) { struct nft_pipapo_elem *ret = ERR_PTR(-ENOENT); - struct nft_pipapo *priv = nft_set_priv(set); unsigned long *res_map, *fill_map = NULL; - const struct nft_pipapo_match *m; const struct nft_pipapo_field *f; int i; - m = priv->clone; if (m->bsize_max == 0) return ret; @@ -612,9 +611,11 @@ static struct nft_elem_priv * nft_pipapo_get(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem, unsigned int flags) { + struct nft_pipapo *priv = nft_set_priv(set); + struct nft_pipapo_match *m = rcu_dereference(priv->match); struct nft_pipapo_elem *e; - e = pipapo_get(net, set, (const u8 *)elem->key.val.data, + e = pipapo_get(net, set, m, (const u8 *)elem->key.val.data, nft_genmask_cur(net), get_jiffies_64(), GFP_ATOMIC); if (IS_ERR(e)) @@ -1247,6 +1248,40 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone, return 0; } +static bool nft_pipapo_transaction_mutex_held(const struct nft_set *set) +{ +#ifdef CONFIG_PROVE_LOCKING + const struct net *net = read_pnet(&set->net); + + return lockdep_is_held(&nft_pernet(net)->commit_mutex); +#else + return true; +#endif +} + +static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old); + +/** + * pipapo_maybe_clone() - Build clone for pending data changes, if not existing + * @set: nftables API set representation + * + * Return: newly created or existing clone, if any. NULL on allocation failure + */ +static struct nft_pipapo_match *pipapo_maybe_clone(const struct nft_set *set) +{ + struct nft_pipapo *priv = nft_set_priv(set); + struct nft_pipapo_match *m; + + if (priv->clone) + return priv->clone; + + m = rcu_dereference_protected(priv->match, + nft_pipapo_transaction_mutex_held(set)); + priv->clone = pipapo_clone(m); + + return priv->clone; +} + /** * nft_pipapo_insert() - Validate and insert ranged elements * @net: Network namespace @@ -1263,8 +1298,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS]; const u8 *start = (const u8 *)elem->key.val.data, *end; - struct nft_pipapo *priv = nft_set_priv(set); - struct nft_pipapo_match *m = priv->clone; + struct nft_pipapo_match *m = pipapo_maybe_clone(set); u8 genmask = nft_genmask_next(net); struct nft_pipapo_elem *e, *dup; u64 tstamp = nft_net_tstamp(net); @@ -1272,12 +1306,15 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, const u8 *start_p, *end_p; int i, bsize_max, err = 0; + if (!m) + return -ENOMEM; + if (nft_set_ext_exists(ext, NFT_SET_EXT_KEY_END)) end = (const u8 *)nft_set_ext_key_end(ext)->data; else end = start; - dup = pipapo_get(net, set, start, genmask, tstamp, GFP_KERNEL); + dup = pipapo_get(net, set, m, start, genmask, tstamp, GFP_KERNEL); if (!IS_ERR(dup)) { /* Check if we already have the same exact entry */ const struct nft_data *dup_key, *dup_end; @@ -1299,7 +1336,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, if (PTR_ERR(dup) == -ENOENT) { /* Look for partially overlapping entries */ - dup = pipapo_get(net, set, end, nft_genmask_next(net), tstamp, + dup = pipapo_get(net, set, m, end, nft_genmask_next(net), tstamp, GFP_KERNEL); } @@ -1332,8 +1369,6 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, } /* Insert */ - priv->dirty = true; - bsize_max = m->bsize_max; nft_pipapo_for_each_field(f, i, m) { @@ -1384,7 +1419,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, * pipapo_clone() - Clone matching data to create new working copy * @old: Existing matching data * - * Return: copy of matching data passed as 'old', error pointer on failure + * Return: copy of matching data passed as 'old' or NULL. */ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old) { @@ -1394,7 +1429,7 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old) new = kmalloc(struct_size(new, f, old->field_count), GFP_KERNEL); if (!new) - return ERR_PTR(-ENOMEM); + return NULL; new->field_count = old->field_count; new->bsize_max = old->bsize_max; @@ -1466,7 +1501,7 @@ out_scratch: free_percpu(new->scratch); kfree(new); - return ERR_PTR(-ENOMEM); + return NULL; } /** @@ -1698,8 +1733,6 @@ static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m) * NFT_SET_ELEM_DEAD_BIT. */ if (__nft_set_elem_expired(&e->ext, tstamp)) { - priv->dirty = true; - gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL); if (!gc) return; @@ -1777,57 +1810,30 @@ static void pipapo_reclaim_match(struct rcu_head *rcu) static void nft_pipapo_commit(struct nft_set *set) { struct nft_pipapo *priv = nft_set_priv(set); - struct nft_pipapo_match *new_clone, *old; - - if (time_after_eq(jiffies, priv->last_gc + nft_set_gc_interval(set))) - pipapo_gc(set, priv->clone); + struct nft_pipapo_match *old; - if (!priv->dirty) + if (!priv->clone) return; - new_clone = pipapo_clone(priv->clone); - if (IS_ERR(new_clone)) - return; + if (time_after_eq(jiffies, priv->last_gc + nft_set_gc_interval(set))) + pipapo_gc(set, priv->clone); - priv->dirty = false; + old = rcu_replace_pointer(priv->match, priv->clone, + nft_pipapo_transaction_mutex_held(set)); + priv->clone = NULL; - old = rcu_access_pointer(priv->match); - rcu_assign_pointer(priv->match, priv->clone); if (old) call_rcu(&old->rcu, pipapo_reclaim_match); - - priv->clone = new_clone; -} - -static bool nft_pipapo_transaction_mutex_held(const struct nft_set *set) -{ -#ifdef CONFIG_PROVE_LOCKING - const struct net *net = read_pnet(&set->net); - - return lockdep_is_held(&nft_pernet(net)->commit_mutex); -#else - return true; -#endif } static void nft_pipapo_abort(const struct nft_set *set) { struct nft_pipapo *priv = nft_set_priv(set); - struct nft_pipapo_match *new_clone, *m; - - if (!priv->dirty) - return; - - m = rcu_dereference_protected(priv->match, nft_pipapo_transaction_mutex_held(set)); - new_clone = pipapo_clone(m); - if (IS_ERR(new_clone)) + if (!priv->clone) return; - - priv->dirty = false; - pipapo_free_match(priv->clone); - priv->clone = new_clone; + priv->clone = NULL; } /** @@ -1851,52 +1857,38 @@ static void nft_pipapo_activate(const struct net *net, } /** - * pipapo_deactivate() - Check that element is in set, mark as inactive + * nft_pipapo_deactivate() - Search for element and make it inactive * @net: Network namespace * @set: nftables API set representation - * @data: Input key data - * @ext: nftables API extension pointer, used to check for end element - * - * This is a convenience function that can be called from both - * nft_pipapo_deactivate() and nft_pipapo_flush(), as they are in fact the same - * operation. + * @elem: nftables API element representation containing key data * * Return: deactivated element if found, NULL otherwise. */ -static void *pipapo_deactivate(const struct net *net, const struct nft_set *set, - const u8 *data, const struct nft_set_ext *ext) +static struct nft_elem_priv * +nft_pipapo_deactivate(const struct net *net, const struct nft_set *set, + const struct nft_set_elem *elem) { + struct nft_pipapo_match *m = pipapo_maybe_clone(set); struct nft_pipapo_elem *e; - e = pipapo_get(net, set, data, nft_genmask_next(net), - nft_net_tstamp(net), GFP_KERNEL); + /* removal must occur on priv->clone, if we are low on memory + * we have no choice and must fail the removal request. + */ + if (!m) + return NULL; + + e = pipapo_get(net, set, m, (const u8 *)elem->key.val.data, + nft_genmask_next(net), nft_net_tstamp(net), GFP_KERNEL); if (IS_ERR(e)) return NULL; nft_set_elem_change_active(net, set, &e->ext); - return e; -} - -/** - * nft_pipapo_deactivate() - Call pipapo_deactivate() to make element inactive - * @net: Network namespace - * @set: nftables API set representation - * @elem: nftables API element representation containing key data - * - * Return: deactivated element if found, NULL otherwise. - */ -static struct nft_elem_priv * -nft_pipapo_deactivate(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem) -{ - const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); - - return pipapo_deactivate(net, set, (const u8 *)elem->key.val.data, ext); + return &e->priv; } /** - * nft_pipapo_flush() - Call pipapo_deactivate() to make element inactive + * nft_pipapo_flush() - make element inactive * @net: Network namespace * @set: nftables API set representation * @elem_priv: nftables API element representation containing key data @@ -2093,7 +2085,6 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set, match_end += NFT_PIPAPO_GROUPS_PADDED_SIZE(f); if (last && f->mt[rulemap[i].to].e == e) { - priv->dirty = true; pipapo_drop(m, rulemap); return; } @@ -2106,35 +2097,23 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set, } /** - * nft_pipapo_walk() - Walk over elements + * nft_pipapo_do_walk() - Walk over elements in m * @ctx: nftables API context * @set: nftables API set representation + * @m: matching data pointing to key mapping array * @iter: Iterator * * As elements are referenced in the mapping array for the last field, directly * scan that array: there's no need to follow rule mappings from the first - * field. + * field. @m is protected either by RCU read lock or by transaction mutex. */ -static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set, - struct nft_set_iter *iter) +static void nft_pipapo_do_walk(const struct nft_ctx *ctx, struct nft_set *set, + const struct nft_pipapo_match *m, + struct nft_set_iter *iter) { - struct nft_pipapo *priv = nft_set_priv(set); - const struct nft_pipapo_match *m; const struct nft_pipapo_field *f; unsigned int i, r; - WARN_ON_ONCE(iter->type != NFT_ITER_READ && - iter->type != NFT_ITER_UPDATE); - - rcu_read_lock(); - if (iter->type == NFT_ITER_READ) - m = rcu_dereference(priv->match); - else - m = priv->clone; - - if (unlikely(!m)) - goto out; - for (i = 0, f = m->f; i < m->field_count - 1; i++, f++) ; @@ -2151,14 +2130,49 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set, iter->err = iter->fn(ctx, set, iter, &e->priv); if (iter->err < 0) - goto out; + return; cont: iter->count++; } +} -out: - rcu_read_unlock(); +/** + * nft_pipapo_walk() - Walk over elements + * @ctx: nftables API context + * @set: nftables API set representation + * @iter: Iterator + * + * Test if destructive action is needed or not, clone active backend if needed + * and call the real function to work on the data. + */ +static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set, + struct nft_set_iter *iter) +{ + struct nft_pipapo *priv = nft_set_priv(set); + const struct nft_pipapo_match *m; + + switch (iter->type) { + case NFT_ITER_UPDATE: + m = pipapo_maybe_clone(set); + if (!m) { + iter->err = -ENOMEM; + return; + } + + nft_pipapo_do_walk(ctx, set, m, iter); + break; + case NFT_ITER_READ: + rcu_read_lock(); + m = rcu_dereference(priv->match); + nft_pipapo_do_walk(ctx, set, m, iter); + rcu_read_unlock(); + break; + default: + iter->err = -EINVAL; + WARN_ON_ONCE(1); + break; + } } /** @@ -2267,21 +2281,10 @@ static int nft_pipapo_init(const struct nft_set *set, f->mt = NULL; } - /* Create an initial clone of matching data for next insertion */ - priv->clone = pipapo_clone(m); - if (IS_ERR(priv->clone)) { - err = PTR_ERR(priv->clone); - goto out_free; - } - - priv->dirty = false; - rcu_assign_pointer(priv->match, m); return 0; -out_free: - free_percpu(m->scratch); out_scratch: kfree(m); @@ -2326,33 +2329,18 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx, { struct nft_pipapo *priv = nft_set_priv(set); struct nft_pipapo_match *m; - int cpu; m = rcu_dereference_protected(priv->match, true); - if (m) { - rcu_barrier(); - - for_each_possible_cpu(cpu) - pipapo_free_scratch(m, cpu); - free_percpu(m->scratch); - pipapo_free_fields(m); - kfree(m); - priv->match = NULL; - } if (priv->clone) { - m = priv->clone; - - nft_set_pipapo_match_destroy(ctx, set, m); - - for_each_possible_cpu(cpu) - pipapo_free_scratch(priv->clone, cpu); - free_percpu(priv->clone->scratch); - - pipapo_free_fields(priv->clone); - kfree(priv->clone); + nft_set_pipapo_match_destroy(ctx, set, priv->clone); + pipapo_free_match(priv->clone); priv->clone = NULL; + } else { + nft_set_pipapo_match_destroy(ctx, set, m); } + + pipapo_free_match(m); } /** |