diff options
Diffstat (limited to 'net/netfilter/nf_flow_table_offload.c')
-rw-r--r-- | net/netfilter/nf_flow_table_offload.c | 331 |
1 files changed, 249 insertions, 82 deletions
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index 06f00cdc3891..e3b099c14eff 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -7,13 +7,13 @@ #include <linux/tc_act/tc_csum.h> #include <net/flow_offload.h> #include <net/netfilter/nf_flow_table.h> +#include <net/netfilter/nf_tables.h> #include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_acct.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_tuple.h> -static struct work_struct nf_flow_offload_work; -static DEFINE_SPINLOCK(flow_offload_pending_list_lock); -static LIST_HEAD(flow_offload_pending_list); +static struct workqueue_struct *nf_flow_offload_wq; struct flow_offload_work { struct list_head list; @@ -21,40 +21,68 @@ struct flow_offload_work { int priority; struct nf_flowtable *flowtable; struct flow_offload *flow; -}; - -struct nf_flow_key { - struct flow_dissector_key_meta meta; - struct flow_dissector_key_control control; - struct flow_dissector_key_basic basic; - union { - struct flow_dissector_key_ipv4_addrs ipv4; - struct flow_dissector_key_ipv6_addrs ipv6; - }; - struct flow_dissector_key_tcp tcp; - struct flow_dissector_key_ports tp; -} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */ - -struct nf_flow_match { - struct flow_dissector dissector; - struct nf_flow_key key; - struct nf_flow_key mask; -}; - -struct nf_flow_rule { - struct nf_flow_match match; - struct flow_rule *rule; + struct work_struct work; }; #define NF_FLOW_DISSECTOR(__match, __type, __field) \ (__match)->dissector.offset[__type] = \ offsetof(struct nf_flow_key, __field) +static void nf_flow_rule_lwt_match(struct nf_flow_match *match, + struct ip_tunnel_info *tun_info) +{ + struct nf_flow_key *mask = &match->mask; + struct nf_flow_key *key = &match->key; + unsigned int enc_keys; + + if (!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX)) + return; + + NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_CONTROL, enc_control); + NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_KEYID, enc_key_id); + key->enc_key_id.keyid = tunnel_id_to_key32(tun_info->key.tun_id); + mask->enc_key_id.keyid = 0xffffffff; + enc_keys = BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) | + BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL); + + if (ip_tunnel_info_af(tun_info) == AF_INET) { + NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, + enc_ipv4); + key->enc_ipv4.src = tun_info->key.u.ipv4.dst; + key->enc_ipv4.dst = tun_info->key.u.ipv4.src; + if (key->enc_ipv4.src) + mask->enc_ipv4.src = 0xffffffff; + if (key->enc_ipv4.dst) + mask->enc_ipv4.dst = 0xffffffff; + enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS); + key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; + } else { + memcpy(&key->enc_ipv6.src, &tun_info->key.u.ipv6.dst, + sizeof(struct in6_addr)); + memcpy(&key->enc_ipv6.dst, &tun_info->key.u.ipv6.src, + sizeof(struct in6_addr)); + if (memcmp(&key->enc_ipv6.src, &in6addr_any, + sizeof(struct in6_addr))) + memset(&key->enc_ipv6.src, 0xff, + sizeof(struct in6_addr)); + if (memcmp(&key->enc_ipv6.dst, &in6addr_any, + sizeof(struct in6_addr))) + memset(&key->enc_ipv6.dst, 0xff, + sizeof(struct in6_addr)); + enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS); + key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; + } + + match->dissector.used_keys |= enc_keys; +} + static int nf_flow_rule_match(struct nf_flow_match *match, - const struct flow_offload_tuple *tuple) + const struct flow_offload_tuple *tuple, + struct dst_entry *other_dst) { struct nf_flow_key *mask = &match->mask; struct nf_flow_key *key = &match->key; + struct ip_tunnel_info *tun_info; NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_META, meta); NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CONTROL, control); @@ -64,6 +92,11 @@ static int nf_flow_rule_match(struct nf_flow_match *match, NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_TCP, tcp); NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_PORTS, tp); + if (other_dst && other_dst->lwtstate) { + tun_info = lwt_tun_info(other_dst->lwtstate); + nf_flow_rule_lwt_match(match, tun_info); + } + key->meta.ingress_ifindex = tuple->iifidx; mask->meta.ingress_ifindex = 0xffffffff; @@ -87,6 +120,7 @@ static int nf_flow_rule_match(struct nf_flow_match *match, default: return -EOPNOTSUPP; } + mask->control.addr_type = 0xffff; match->dissector.used_keys |= BIT(key->control.addr_type); mask->basic.n_proto = 0xffff; @@ -442,10 +476,52 @@ static void flow_offload_redirect(const struct flow_offload *flow, dev_hold(rt->dst.dev); } +static void flow_offload_encap_tunnel(const struct flow_offload *flow, + enum flow_offload_tuple_dir dir, + struct nf_flow_rule *flow_rule) +{ + struct flow_action_entry *entry; + struct dst_entry *dst; + + dst = flow->tuplehash[dir].tuple.dst_cache; + if (dst && dst->lwtstate) { + struct ip_tunnel_info *tun_info; + + tun_info = lwt_tun_info(dst->lwtstate); + if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) { + entry = flow_action_entry_next(flow_rule); + entry->id = FLOW_ACTION_TUNNEL_ENCAP; + entry->tunnel = tun_info; + } + } +} + +static void flow_offload_decap_tunnel(const struct flow_offload *flow, + enum flow_offload_tuple_dir dir, + struct nf_flow_rule *flow_rule) +{ + struct flow_action_entry *entry; + struct dst_entry *dst; + + dst = flow->tuplehash[!dir].tuple.dst_cache; + if (dst && dst->lwtstate) { + struct ip_tunnel_info *tun_info; + + tun_info = lwt_tun_info(dst->lwtstate); + if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) { + entry = flow_action_entry_next(flow_rule); + entry->id = FLOW_ACTION_TUNNEL_DECAP; + } + } +} + int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow, enum flow_offload_tuple_dir dir, struct nf_flow_rule *flow_rule) { + flow_offload_decap_tunnel(flow, dir, flow_rule); + flow_offload_encap_tunnel(flow, dir, flow_rule); + if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 || flow_offload_eth_dst(net, flow, dir, flow_rule) < 0) return -1; @@ -472,6 +548,9 @@ int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow, enum flow_offload_tuple_dir dir, struct nf_flow_rule *flow_rule) { + flow_offload_decap_tunnel(flow, dir, flow_rule); + flow_offload_encap_tunnel(flow, dir, flow_rule); + if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 || flow_offload_eth_dst(net, flow, dir, flow_rule) < 0) return -1; @@ -502,6 +581,7 @@ nf_flow_offload_rule_alloc(struct net *net, const struct flow_offload *flow = offload->flow; const struct flow_offload_tuple *tuple; struct nf_flow_rule *flow_rule; + struct dst_entry *other_dst; int err = -ENOMEM; flow_rule = kzalloc(sizeof(*flow_rule), GFP_KERNEL); @@ -517,7 +597,8 @@ nf_flow_offload_rule_alloc(struct net *net, flow_rule->rule->match.key = &flow_rule->match.key; tuple = &flow->tuplehash[dir].tuple; - err = nf_flow_rule_match(&flow_rule->match, tuple); + other_dst = flow->tuplehash[!dir].tuple.dst_cache; + err = nf_flow_rule_match(&flow_rule->match, tuple, other_dst); if (err < 0) goto err_flow_match; @@ -597,6 +678,7 @@ static int nf_flow_offload_tuple(struct nf_flowtable *flowtable, struct nf_flow_rule *flow_rule, enum flow_offload_tuple_dir dir, int priority, int cmd, + struct flow_stats *stats, struct list_head *block_cb_list) { struct flow_cls_offload cls_flow = {}; @@ -610,6 +692,7 @@ static int nf_flow_offload_tuple(struct nf_flowtable *flowtable, if (cmd == FLOW_CLS_REPLACE) cls_flow.rule = flow_rule->rule; + down_read(&flowtable->flow_block_lock); list_for_each_entry(block_cb, block_cb_list, list) { err = block_cb->cb(TC_SETUP_CLSFLOWER, &cls_flow, block_cb->cb_priv); @@ -618,6 +701,10 @@ static int nf_flow_offload_tuple(struct nf_flowtable *flowtable, i++; } + up_read(&flowtable->flow_block_lock); + + if (cmd == FLOW_CLS_STATS) + memcpy(stats, &cls_flow.stats, sizeof(*stats)); return i; } @@ -628,7 +715,7 @@ static int flow_offload_tuple_add(struct flow_offload_work *offload, { return nf_flow_offload_tuple(offload->flowtable, offload->flow, flow_rule, dir, offload->priority, - FLOW_CLS_REPLACE, + FLOW_CLS_REPLACE, NULL, &offload->flowtable->flow_block.cb_list); } @@ -636,7 +723,7 @@ static void flow_offload_tuple_del(struct flow_offload_work *offload, enum flow_offload_tuple_dir dir) { nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir, - offload->priority, FLOW_CLS_DESTROY, + offload->priority, FLOW_CLS_DESTROY, NULL, &offload->flowtable->flow_block.cb_list); } @@ -682,19 +769,9 @@ static void flow_offload_tuple_stats(struct flow_offload_work *offload, enum flow_offload_tuple_dir dir, struct flow_stats *stats) { - struct nf_flowtable *flowtable = offload->flowtable; - struct flow_cls_offload cls_flow = {}; - struct flow_block_cb *block_cb; - struct netlink_ext_ack extack; - __be16 proto = ETH_P_ALL; - - nf_flow_offload_init(&cls_flow, proto, offload->priority, - FLOW_CLS_STATS, - &offload->flow->tuplehash[dir].tuple, &extack); - - list_for_each_entry(block_cb, &flowtable->flow_block.cb_list, list) - block_cb->cb(TC_SETUP_CLSFLOWER, &cls_flow, block_cb->cb_priv); - memcpy(stats, &cls_flow.stats, sizeof(*stats)); + nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir, + offload->priority, FLOW_CLS_STATS, stats, + &offload->flowtable->flow_block.cb_list); } static void flow_offload_work_stats(struct flow_offload_work *offload) @@ -708,19 +785,25 @@ static void flow_offload_work_stats(struct flow_offload_work *offload) lastused = max_t(u64, stats[0].lastused, stats[1].lastused); offload->flow->timeout = max_t(u64, offload->flow->timeout, lastused + NF_FLOW_TIMEOUT); + + if (offload->flowtable->flags & NF_FLOWTABLE_COUNTER) { + if (stats[0].pkts) + nf_ct_acct_add(offload->flow->ct, + FLOW_OFFLOAD_DIR_ORIGINAL, + stats[0].pkts, stats[0].bytes); + if (stats[1].pkts) + nf_ct_acct_add(offload->flow->ct, + FLOW_OFFLOAD_DIR_REPLY, + stats[1].pkts, stats[1].bytes); + } } static void flow_offload_work_handler(struct work_struct *work) { - struct flow_offload_work *offload, *next; - LIST_HEAD(offload_pending_list); - - spin_lock_bh(&flow_offload_pending_list_lock); - list_replace_init(&flow_offload_pending_list, &offload_pending_list); - spin_unlock_bh(&flow_offload_pending_list_lock); + struct flow_offload_work *offload; - list_for_each_entry_safe(offload, next, &offload_pending_list, list) { - switch (offload->cmd) { + offload = container_of(work, struct flow_offload_work, work); + switch (offload->cmd) { case FLOW_CLS_REPLACE: flow_offload_work_add(offload); break; @@ -732,19 +815,14 @@ static void flow_offload_work_handler(struct work_struct *work) break; default: WARN_ON_ONCE(1); - } - list_del(&offload->list); - kfree(offload); } + + kfree(offload); } static void flow_offload_queue_work(struct flow_offload_work *offload) { - spin_lock_bh(&flow_offload_pending_list_lock); - list_add_tail(&offload->list, &flow_offload_pending_list); - spin_unlock_bh(&flow_offload_pending_list_lock); - - schedule_work(&nf_flow_offload_work); + queue_work(nf_flow_offload_wq, &offload->work); } static struct flow_offload_work * @@ -761,6 +839,7 @@ nf_flow_offload_work_alloc(struct nf_flowtable *flowtable, offload->flow = flow; offload->priority = flowtable->priority; offload->flowtable = flowtable; + INIT_WORK(&offload->work, flow_offload_work_handler); return offload; } @@ -811,7 +890,7 @@ void nf_flow_offload_stats(struct nf_flowtable *flowtable, void nf_flow_table_offload_flush(struct nf_flowtable *flowtable) { if (nf_flowtable_hw_offload(flowtable)) - flush_work(&nf_flow_offload_work); + flush_workqueue(nf_flow_offload_wq); } static int nf_flow_table_block_setup(struct nf_flowtable *flowtable, @@ -839,25 +918,47 @@ static int nf_flow_table_block_setup(struct nf_flowtable *flowtable, return err; } -static int nf_flow_table_offload_cmd(struct flow_block_offload *bo, - struct nf_flowtable *flowtable, - struct net_device *dev, - enum flow_block_command cmd, - struct netlink_ext_ack *extack) +static void nf_flow_table_block_offload_init(struct flow_block_offload *bo, + struct net *net, + enum flow_block_command cmd, + struct nf_flowtable *flowtable, + struct netlink_ext_ack *extack) { - int err; - - if (!dev->netdev_ops->ndo_setup_tc) - return -EOPNOTSUPP; - memset(bo, 0, sizeof(*bo)); - bo->net = dev_net(dev); + bo->net = net; bo->block = &flowtable->flow_block; bo->command = cmd; bo->binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS; bo->extack = extack; INIT_LIST_HEAD(&bo->cb_list); +} + +static int nf_flow_table_indr_offload_cmd(struct flow_block_offload *bo, + struct nf_flowtable *flowtable, + struct net_device *dev, + enum flow_block_command cmd, + struct netlink_ext_ack *extack) +{ + nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable, + extack); + flow_indr_block_call(dev, bo, cmd, TC_SETUP_FT); + + if (list_empty(&bo->cb_list)) + return -EOPNOTSUPP; + return 0; +} + +static int nf_flow_table_offload_cmd(struct flow_block_offload *bo, + struct nf_flowtable *flowtable, + struct net_device *dev, + enum flow_block_command cmd, + struct netlink_ext_ack *extack) +{ + int err; + + nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable, + extack); err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_FT, bo); if (err < 0) return err; @@ -876,7 +977,12 @@ int nf_flow_table_offload_setup(struct nf_flowtable *flowtable, if (!nf_flowtable_hw_offload(flowtable)) return 0; - err = nf_flow_table_offload_cmd(&bo, flowtable, dev, cmd, &extack); + if (dev->netdev_ops->ndo_setup_tc) + err = nf_flow_table_offload_cmd(&bo, flowtable, dev, cmd, + &extack); + else + err = nf_flow_table_indr_offload_cmd(&bo, flowtable, dev, cmd, + &extack); if (err < 0) return err; @@ -884,22 +990,83 @@ int nf_flow_table_offload_setup(struct nf_flowtable *flowtable, } EXPORT_SYMBOL_GPL(nf_flow_table_offload_setup); -int nf_flow_table_offload_init(void) +static void nf_flow_table_indr_block_ing_cmd(struct net_device *dev, + struct nf_flowtable *flowtable, + flow_indr_block_bind_cb_t *cb, + void *cb_priv, + enum flow_block_command cmd) { - INIT_WORK(&nf_flow_offload_work, flow_offload_work_handler); + struct netlink_ext_ack extack = {}; + struct flow_block_offload bo; - return 0; + if (!flowtable) + return; + + nf_flow_table_block_offload_init(&bo, dev_net(dev), cmd, flowtable, + &extack); + + cb(dev, cb_priv, TC_SETUP_FT, &bo); + + nf_flow_table_block_setup(flowtable, &bo, cmd); } -void nf_flow_table_offload_exit(void) +static void nf_flow_table_indr_block_cb_cmd(struct nf_flowtable *flowtable, + struct net_device *dev, + flow_indr_block_bind_cb_t *cb, + void *cb_priv, + enum flow_block_command cmd) { - struct flow_offload_work *offload, *next; - LIST_HEAD(offload_pending_list); + if (!(flowtable->flags & NF_FLOWTABLE_HW_OFFLOAD)) + return; - cancel_work_sync(&nf_flow_offload_work); + nf_flow_table_indr_block_ing_cmd(dev, flowtable, cb, cb_priv, cmd); +} - list_for_each_entry_safe(offload, next, &offload_pending_list, list) { - list_del(&offload->list); - kfree(offload); +static void nf_flow_table_indr_block_cb(struct net_device *dev, + flow_indr_block_bind_cb_t *cb, + void *cb_priv, + enum flow_block_command cmd) +{ + struct net *net = dev_net(dev); + struct nft_flowtable *nft_ft; + struct nft_table *table; + struct nft_hook *hook; + + mutex_lock(&net->nft.commit_mutex); + list_for_each_entry(table, &net->nft.tables, list) { + list_for_each_entry(nft_ft, &table->flowtables, list) { + list_for_each_entry(hook, &nft_ft->hook_list, list) { + if (hook->ops.dev != dev) + continue; + + nf_flow_table_indr_block_cb_cmd(&nft_ft->data, + dev, cb, + cb_priv, cmd); + } + } } + mutex_unlock(&net->nft.commit_mutex); +} + +static struct flow_indr_block_entry block_ing_entry = { + .cb = nf_flow_table_indr_block_cb, + .list = LIST_HEAD_INIT(block_ing_entry.list), +}; + +int nf_flow_table_offload_init(void) +{ + nf_flow_offload_wq = alloc_workqueue("nf_flow_table_offload", + WQ_UNBOUND | WQ_MEM_RECLAIM, 0); + if (!nf_flow_offload_wq) + return -ENOMEM; + + flow_indr_add_block_cb(&block_ing_entry); + + return 0; +} + +void nf_flow_table_offload_exit(void) +{ + flow_indr_del_block_cb(&block_ing_entry); + destroy_workqueue(nf_flow_offload_wq); } |