summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2021-04-20 01:49:50 +0300
committerDavid S. Miller <davem@davemloft.net>2021-04-20 01:49:50 +0300
commit6dd06ec7c13bc970edb0729221a47bf6368e2b9e (patch)
tree6cb002a23ad5026251efb4b4d9f2c80fb6fb1b93
parentc6400e3fc3fa821a26a58cf867331e0877a4c56b (diff)
parentb72920f6e4a9d6607b723d69b7f412c829769c75 (diff)
downloadlinux-6dd06ec7c13bc970edb0729221a47bf6368e2b9e.tar.xz
Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next
Pablo Neira Ayuso says: ==================== Netfilter updates for net-next The following patchset contains Netfilter updates for net-next: 1) Add vlan match and pop actions to the flowtable offload, patches from wenxu. 2) Reduce size of the netns_ct structure, which itself is embedded in struct net Make netns_ct a read-mostly structure. Patches from Florian Westphal. 3) Add FLOW_OFFLOAD_XMIT_UNSPEC to skip dst check from garbage collector path, as required by the tc CT action. From Roi Dayan. 4) VLAN offload fixes for nftables: Allow for matching on both s-vlan and c-vlan selectors. Fix match of VLAN id due to incorrect byteorder. Add a new routine to properly populate flow dissector ethertypes. 5) Missing keys in ip{6}_route_me_harder() results in incorrect routes. This includes an update for selftest infra. Patches from Ido Schimmel. 6) Add counter hardware offload support through FLOW_CLS_STATS. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/netfilter/nf_conntrack.h8
-rw-r--r--include/net/netfilter/nf_flow_table.h5
-rw-r--r--include/net/netfilter/nf_tables.h2
-rw-r--r--include/net/netfilter/nf_tables_offload.h13
-rw-r--r--include/net/netns/conntrack.h23
-rw-r--r--net/ipv4/netfilter.c2
-rw-r--r--net/ipv6/netfilter.c2
-rw-r--r--net/netfilter/nf_conntrack_core.c46
-rw-r--r--net/netfilter/nf_conntrack_expect.c22
-rw-r--r--net/netfilter/nf_conntrack_helper.c15
-rw-r--r--net/netfilter/nf_conntrack_netlink.c5
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c34
-rw-r--r--net/netfilter/nf_conntrack_standalone.c66
-rw-r--r--net/netfilter/nf_flow_table_core.c3
-rw-r--r--net/netfilter/nf_flow_table_offload.c52
-rw-r--r--net/netfilter/nf_tables_api.c3
-rw-r--r--net/netfilter/nf_tables_offload.c88
-rw-r--r--net/netfilter/nft_cmp.c41
-rw-r--r--net/netfilter/nft_counter.c29
-rw-r--r--net/netfilter/nft_payload.c13
-rwxr-xr-xtools/testing/selftests/net/fib_tests.sh152
21 files changed, 520 insertions, 104 deletions
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 86d86c860ede..06dc6db70d18 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -44,6 +44,13 @@ union nf_conntrack_expect_proto {
};
struct nf_conntrack_net {
+ /* only used when new connection is allocated: */
+ atomic_t count;
+ unsigned int expect_count;
+ u8 sysctl_auto_assign_helper;
+ bool auto_assign_helper_warned;
+
+ /* only used from work queues, configuration plane, and so on: */
unsigned int users4;
unsigned int users6;
unsigned int users_bridge;
@@ -331,6 +338,7 @@ struct nf_conn *nf_ct_tmpl_alloc(struct net *net,
void nf_ct_tmpl_free(struct nf_conn *tmpl);
u32 nf_ct_get_id(const struct nf_conn *ct);
+u32 nf_conntrack_count(const struct net *net);
static inline void
nf_ct_set(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info info)
diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index 583b327d8fc0..51d8eb99764d 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -21,6 +21,8 @@ struct nf_flow_key {
struct flow_dissector_key_control control;
struct flow_dissector_key_control enc_control;
struct flow_dissector_key_basic basic;
+ struct flow_dissector_key_vlan vlan;
+ struct flow_dissector_key_vlan cvlan;
union {
struct flow_dissector_key_ipv4_addrs ipv4;
struct flow_dissector_key_ipv6_addrs ipv6;
@@ -90,7 +92,8 @@ enum flow_offload_tuple_dir {
#define FLOW_OFFLOAD_DIR_MAX IP_CT_DIR_MAX
enum flow_offload_xmit_type {
- FLOW_OFFLOAD_XMIT_NEIGH = 0,
+ FLOW_OFFLOAD_XMIT_UNSPEC = 0,
+ FLOW_OFFLOAD_XMIT_NEIGH,
FLOW_OFFLOAD_XMIT_XFRM,
FLOW_OFFLOAD_XMIT_DIRECT,
};
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index f0f7a3c5da6a..4a75da2a2e1d 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -867,6 +867,8 @@ struct nft_expr_ops {
int (*offload)(struct nft_offload_ctx *ctx,
struct nft_flow_rule *flow,
const struct nft_expr *expr);
+ void (*offload_stats)(struct nft_expr *expr,
+ const struct flow_stats *stats);
u32 offload_flags;
const struct nft_expr_type *type;
void *data;
diff --git a/include/net/netfilter/nf_tables_offload.h b/include/net/netfilter/nf_tables_offload.h
index 1d34fe154fe0..f9d95ff82df8 100644
--- a/include/net/netfilter/nf_tables_offload.h
+++ b/include/net/netfilter/nf_tables_offload.h
@@ -4,11 +4,16 @@
#include <net/flow_offload.h>
#include <net/netfilter/nf_tables.h>
+enum nft_offload_reg_flags {
+ NFT_OFFLOAD_F_NETWORK2HOST = (1 << 0),
+};
+
struct nft_offload_reg {
u32 key;
u32 len;
u32 base_offset;
u32 offset;
+ u32 flags;
struct nft_data data;
struct nft_data mask;
};
@@ -45,6 +50,7 @@ struct nft_flow_key {
struct flow_dissector_key_ports tp;
struct flow_dissector_key_ip ip;
struct flow_dissector_key_vlan vlan;
+ struct flow_dissector_key_vlan cvlan;
struct flow_dissector_key_eth_addrs eth_addrs;
struct flow_dissector_key_meta meta;
} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
@@ -68,16 +74,21 @@ void nft_flow_rule_set_addr_type(struct nft_flow_rule *flow,
struct nft_rule;
struct nft_flow_rule *nft_flow_rule_create(struct net *net, const struct nft_rule *rule);
+int nft_flow_rule_stats(const struct nft_chain *chain, const struct nft_rule *rule);
void nft_flow_rule_destroy(struct nft_flow_rule *flow);
int nft_flow_rule_offload_commit(struct net *net);
-#define NFT_OFFLOAD_MATCH(__key, __base, __field, __len, __reg) \
+#define NFT_OFFLOAD_MATCH_FLAGS(__key, __base, __field, __len, __reg, __flags) \
(__reg)->base_offset = \
offsetof(struct nft_flow_key, __base); \
(__reg)->offset = \
offsetof(struct nft_flow_key, __base.__field); \
(__reg)->len = __len; \
(__reg)->key = __key; \
+ (__reg)->flags = __flags;
+
+#define NFT_OFFLOAD_MATCH(__key, __base, __field, __len, __reg) \
+ NFT_OFFLOAD_MATCH_FLAGS(__key, __base, __field, __len, __reg, 0)
#define NFT_OFFLOAD_MATCH_EXACT(__key, __base, __field, __len, __reg) \
NFT_OFFLOAD_MATCH(__key, __base, __field, __len, __reg) \
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index e5f664d69ead..ad0a95c2335e 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -24,9 +24,9 @@ struct nf_generic_net {
struct nf_tcp_net {
unsigned int timeouts[TCP_CONNTRACK_TIMEOUT_MAX];
- int tcp_loose;
- int tcp_be_liberal;
- int tcp_max_retrans;
+ u8 tcp_loose;
+ u8 tcp_be_liberal;
+ u8 tcp_max_retrans;
};
enum udp_conntrack {
@@ -45,7 +45,7 @@ struct nf_icmp_net {
#ifdef CONFIG_NF_CT_PROTO_DCCP
struct nf_dccp_net {
- int dccp_loose;
+ u8 dccp_loose;
unsigned int dccp_timeout[CT_DCCP_MAX + 1];
};
#endif
@@ -93,18 +93,15 @@ struct ct_pcpu {
};
struct netns_ct {
- atomic_t count;
- unsigned int expect_count;
#ifdef CONFIG_NF_CONNTRACK_EVENTS
bool ecache_dwork_pending;
#endif
- bool auto_assign_helper_warned;
- unsigned int sysctl_log_invalid; /* Log invalid packets */
- int sysctl_events;
- int sysctl_acct;
- int sysctl_auto_assign_helper;
- int sysctl_tstamp;
- int sysctl_checksum;
+ u8 sysctl_log_invalid; /* Log invalid packets */
+ u8 sysctl_events;
+ u8 sysctl_acct;
+ u8 sysctl_auto_assign_helper;
+ u8 sysctl_tstamp;
+ u8 sysctl_checksum;
struct ct_pcpu __percpu *pcpu_lists;
struct ip_conntrack_stat __percpu *stat;
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 7c841037c533..aff707988e23 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -25,6 +25,7 @@ int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, un
__be32 saddr = iph->saddr;
__u8 flags;
struct net_device *dev = skb_dst(skb)->dev;
+ struct flow_keys flkeys;
unsigned int hh_len;
sk = sk_to_full_sk(sk);
@@ -48,6 +49,7 @@ int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, un
fl4.flowi4_oif = l3mdev_master_ifindex(dev);
fl4.flowi4_mark = skb->mark;
fl4.flowi4_flags = flags;
+ fib4_rules_early_flow_dissect(net, skb, &fl4, &flkeys);
rt = ip_route_output_key(net, &fl4);
if (IS_ERR(rt))
return PTR_ERR(rt);
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index ab9a279dd6d4..6ab710b5a1a8 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -24,6 +24,7 @@ int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff
{
const struct ipv6hdr *iph = ipv6_hdr(skb);
struct sock *sk = sk_to_full_sk(sk_partial);
+ struct flow_keys flkeys;
unsigned int hh_len;
struct dst_entry *dst;
int strict = (ipv6_addr_type(&iph->daddr) &
@@ -38,6 +39,7 @@ int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff
};
int err;
+ fib6_rules_early_flow_dissect(net, skb, &fl6, &flkeys);
dst = ip6_route_output(net, sk, &fl6);
err = dst->error;
if (err) {
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index ace3e8265e0a..e0befcf8113a 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -55,6 +55,8 @@
#include "nf_internals.h"
+extern unsigned int nf_conntrack_net_id;
+
__cacheline_aligned_in_smp spinlock_t nf_conntrack_locks[CONNTRACK_LOCKS];
EXPORT_SYMBOL_GPL(nf_conntrack_locks);
@@ -85,6 +87,8 @@ static __read_mostly bool nf_conntrack_locks_all;
static struct conntrack_gc_work conntrack_gc_work;
+extern unsigned int nf_conntrack_net_id;
+
void nf_conntrack_lock(spinlock_t *lock) __acquires(lock)
{
/* 1) Acquire the lock */
@@ -1379,6 +1383,7 @@ static void gc_worker(struct work_struct *work)
i = 0;
hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) {
+ struct nf_conntrack_net *cnet;
struct net *net;
tmp = nf_ct_tuplehash_to_ctrack(h);
@@ -1399,7 +1404,8 @@ static void gc_worker(struct work_struct *work)
continue;
net = nf_ct_net(tmp);
- if (atomic_read(&net->ct.count) < nf_conntrack_max95)
+ cnet = net_generic(net, nf_conntrack_net_id);
+ if (atomic_read(&cnet->count) < nf_conntrack_max95)
continue;
/* need to take reference to avoid possible races */
@@ -1478,17 +1484,18 @@ __nf_conntrack_alloc(struct net *net,
const struct nf_conntrack_tuple *repl,
gfp_t gfp, u32 hash)
{
+ struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
+ unsigned int ct_count;
struct nf_conn *ct;
/* We don't want any race condition at early drop stage */
- atomic_inc(&net->ct.count);
+ ct_count = atomic_inc_return(&cnet->count);
- if (nf_conntrack_max &&
- unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) {
+ if (nf_conntrack_max && unlikely(ct_count > nf_conntrack_max)) {
if (!early_drop(net, hash)) {
if (!conntrack_gc_work.early_drop)
conntrack_gc_work.early_drop = true;
- atomic_dec(&net->ct.count);
+ atomic_dec(&cnet->count);
net_warn_ratelimited("nf_conntrack: table full, dropping packet\n");
return ERR_PTR(-ENOMEM);
}
@@ -1523,7 +1530,7 @@ __nf_conntrack_alloc(struct net *net,
atomic_set(&ct->ct_general.use, 0);
return ct;
out:
- atomic_dec(&net->ct.count);
+ atomic_dec(&cnet->count);
return ERR_PTR(-ENOMEM);
}
@@ -1540,6 +1547,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_alloc);
void nf_conntrack_free(struct nf_conn *ct)
{
struct net *net = nf_ct_net(ct);
+ struct nf_conntrack_net *cnet;
/* A freed object has refcnt == 0, that's
* the golden rule for SLAB_TYPESAFE_BY_RCU
@@ -1548,8 +1556,10 @@ void nf_conntrack_free(struct nf_conn *ct)
nf_ct_ext_destroy(ct);
kmem_cache_free(nf_conntrack_cachep, ct);
+ cnet = net_generic(net, nf_conntrack_net_id);
+
smp_mb__before_atomic();
- atomic_dec(&net->ct.count);
+ atomic_dec(&cnet->count);
}
EXPORT_SYMBOL_GPL(nf_conntrack_free);
@@ -1570,6 +1580,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
const struct nf_conntrack_zone *zone;
struct nf_conn_timeout *timeout_ext;
struct nf_conntrack_zone tmp;
+ struct nf_conntrack_net *cnet;
if (!nf_ct_invert_tuple(&repl_tuple, tuple)) {
pr_debug("Can't invert tuple.\n");
@@ -1603,7 +1614,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
GFP_ATOMIC);
local_bh_disable();
- if (net->ct.expect_count) {
+ cnet = net_generic(net, nf_conntrack_net_id);
+ if (cnet->expect_count) {
spin_lock(&nf_conntrack_expect_lock);
exp = nf_ct_find_expectation(net, zone, tuple);
if (exp) {
@@ -2305,9 +2317,11 @@ __nf_ct_unconfirmed_destroy(struct net *net)
void nf_ct_unconfirmed_destroy(struct net *net)
{
+ struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
+
might_sleep();
- if (atomic_read(&net->ct.count) > 0) {
+ if (atomic_read(&cnet->count) > 0) {
__nf_ct_unconfirmed_destroy(net);
nf_queue_nf_hook_drop(net);
synchronize_net();
@@ -2319,11 +2333,12 @@ void nf_ct_iterate_cleanup_net(struct net *net,
int (*iter)(struct nf_conn *i, void *data),
void *data, u32 portid, int report)
{
+ struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
struct iter_data d;
might_sleep();
- if (atomic_read(&net->ct.count) == 0)
+ if (atomic_read(&cnet->count) == 0)
return;
d.iter = iter;
@@ -2352,7 +2367,9 @@ nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data), void *data)
down_read(&net_rwsem);
for_each_net(net) {
- if (atomic_read(&net->ct.count) == 0)
+ struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
+
+ if (atomic_read(&cnet->count) == 0)
continue;
__nf_ct_unconfirmed_destroy(net);
nf_queue_nf_hook_drop(net);
@@ -2432,8 +2449,10 @@ void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list)
i_see_dead_people:
busy = 0;
list_for_each_entry(net, net_exit_list, exit_list) {
+ struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
+
nf_ct_iterate_cleanup(kill_all, net, 0, 0);
- if (atomic_read(&net->ct.count) != 0)
+ if (atomic_read(&cnet->count) != 0)
busy = 1;
}
if (busy) {
@@ -2714,12 +2733,13 @@ void nf_conntrack_init_end(void)
int nf_conntrack_init_net(struct net *net)
{
+ struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
int ret = -ENOMEM;
int cpu;
BUILD_BUG_ON(IP_CT_UNTRACKED == IP_CT_NUMBER);
BUILD_BUG_ON_NOT_POWER_OF_2(CONNTRACK_LOCKS);
- atomic_set(&net->ct.count, 0);
+ atomic_set(&cnet->count, 0);
net->ct.pcpu_lists = alloc_percpu(struct ct_pcpu);
if (!net->ct.pcpu_lists)
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 42557d2b6a90..efdd391b3f72 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -43,18 +43,23 @@ unsigned int nf_ct_expect_max __read_mostly;
static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
static unsigned int nf_ct_expect_hashrnd __read_mostly;
+extern unsigned int nf_conntrack_net_id;
+
/* nf_conntrack_expect helper functions */
void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
u32 portid, int report)
{
struct nf_conn_help *master_help = nfct_help(exp->master);
struct net *net = nf_ct_exp_net(exp);
+ struct nf_conntrack_net *cnet;
WARN_ON(!master_help);
WARN_ON(timer_pending(&exp->timeout));
hlist_del_rcu(&exp->hnode);
- net->ct.expect_count--;
+
+ cnet = net_generic(net, nf_conntrack_net_id);
+ cnet->expect_count--;
hlist_del_rcu(&exp->lnode);
master_help->expecting[exp->class]--;
@@ -118,10 +123,11 @@ __nf_ct_expect_find(struct net *net,
const struct nf_conntrack_zone *zone,
const struct nf_conntrack_tuple *tuple)
{
+ struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
struct nf_conntrack_expect *i;
unsigned int h;
- if (!net->ct.expect_count)
+ if (!cnet->expect_count)
return NULL;
h = nf_ct_expect_dst_hash(net, tuple);
@@ -158,10 +164,11 @@ nf_ct_find_expectation(struct net *net,
const struct nf_conntrack_zone *zone,
const struct nf_conntrack_tuple *tuple)
{
+ struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
struct nf_conntrack_expect *i, *exp = NULL;
unsigned int h;
- if (!net->ct.expect_count)
+ if (!cnet->expect_count)
return NULL;
h = nf_ct_expect_dst_hash(net, tuple);
@@ -368,6 +375,7 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_put);
static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
{
+ struct nf_conntrack_net *cnet;
struct nf_conn_help *master_help = nfct_help(exp->master);
struct nf_conntrack_helper *helper;
struct net *net = nf_ct_exp_net(exp);
@@ -389,7 +397,8 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
master_help->expecting[exp->class]++;
hlist_add_head_rcu(&exp->hnode, &nf_ct_expect_hash[h]);
- net->ct.expect_count++;
+ cnet = net_generic(net, nf_conntrack_net_id);
+ cnet->expect_count++;
NF_CT_STAT_INC(net, expect_create);
}
@@ -415,6 +424,7 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect,
{
const struct nf_conntrack_expect_policy *p;
struct nf_conntrack_expect *i;
+ struct nf_conntrack_net *cnet;
struct nf_conn *master = expect->master;
struct nf_conn_help *master_help = nfct_help(master);
struct nf_conntrack_helper *helper;
@@ -458,7 +468,8 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect,
}
}
- if (net->ct.expect_count >= nf_ct_expect_max) {
+ cnet = net_generic(net, nf_conntrack_net_id);
+ if (cnet->expect_count >= nf_ct_expect_max) {
net_warn_ratelimited("nf_conntrack: expectation table full\n");
ret = -EMFILE;
}
@@ -686,7 +697,6 @@ module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0400);
int nf_conntrack_expect_pernet_init(struct net *net)
{
- net->ct.expect_count = 0;
return exp_proc_init(net);
}
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index b055187235f8..ac396cc8bfae 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -43,6 +43,8 @@ MODULE_PARM_DESC(nf_conntrack_helper,
static DEFINE_MUTEX(nf_ct_nat_helpers_mutex);
static struct list_head nf_ct_nat_helpers __read_mostly;
+extern unsigned int nf_conntrack_net_id;
+
/* Stupid hash, but collision free for the default registrations of the
* helpers currently in the kernel. */
static unsigned int helper_hash(const struct nf_conntrack_tuple *tuple)
@@ -212,8 +214,10 @@ EXPORT_SYMBOL_GPL(nf_ct_helper_ext_add);
static struct nf_conntrack_helper *
nf_ct_lookup_helper(struct nf_conn *ct, struct net *net)
{
- if (!net->ct.sysctl_auto_assign_helper) {
- if (net->ct.auto_assign_helper_warned)
+ struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
+
+ if (!cnet->sysctl_auto_assign_helper) {
+ if (cnet->auto_assign_helper_warned)
return NULL;
if (!__nf_ct_helper_find(&ct->tuplehash[IP_CT_DIR_REPLY].tuple))
return NULL;
@@ -221,7 +225,7 @@ nf_ct_lookup_helper(struct nf_conn *ct, struct net *net)
"has been turned off for security reasons and CT-based "
"firewall rule not found. Use the iptables CT target "
"to attach helpers instead.\n");
- net->ct.auto_assign_helper_warned = 1;
+ cnet->auto_assign_helper_warned = true;
return NULL;
}
@@ -556,8 +560,9 @@ static const struct nf_ct_ext_type helper_extend = {
void nf_conntrack_helper_pernet_init(struct net *net)
{
- net->ct.auto_assign_helper_warned = false;
- net->ct.sysctl_auto_assign_helper = nf_ct_auto_assign_helper;
+ struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
+
+ cnet->sysctl_auto_assign_helper = nf_ct_auto_assign_helper;
}
int nf_conntrack_helper_init(void)
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index c67a6ec22a74..44e3cb80e2e0 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2559,9 +2559,9 @@ static int
ctnetlink_stat_ct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
struct net *net)
{
- struct nlmsghdr *nlh;
unsigned int flags = portid ? NLM_F_MULTI : 0, event;
- unsigned int nr_conntracks = atomic_read(&net->ct.count);
+ unsigned int nr_conntracks;
+ struct nlmsghdr *nlh;
event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK, IPCTNL_MSG_CT_GET_STATS);
nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC,
@@ -2569,6 +2569,7 @@ ctnetlink_stat_ct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
if (!nlh)
goto nlmsg_failure;
+ nr_conntracks = nf_conntrack_count(net);
if (nla_put_be32(skb, CTA_STATS_GLOBAL_ENTRIES, htonl(nr_conntracks)))
goto nla_put_failure;
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index ec23330687a5..318b8f723349 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -31,20 +31,6 @@
#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
-/* "Be conservative in what you do,
- be liberal in what you accept from others."
- If it's non-zero, we mark only out of window RST segments as INVALID. */
-static int nf_ct_tcp_be_liberal __read_mostly = 0;
-
-/* If it is set to zero, we disable picking up already established
- connections. */
-static int nf_ct_tcp_loose __read_mostly = 1;
-
-/* Max number of the retransmitted packets without receiving an (acceptable)
- ACK from the destination. If this number is reached, a shorter timer
- will be started. */
-static int nf_ct_tcp_max_retrans __read_mostly = 3;
-
/* FIXME: Examine ipfilter's timeouts and conntrack transitions more
closely. They're more complex. --RR */
@@ -1436,9 +1422,23 @@ void nf_conntrack_tcp_init_net(struct net *net)
* ->timeouts[0] contains 'new' timeout, like udp or icmp.
*/
tn->timeouts[0] = tcp_timeouts[TCP_CONNTRACK_SYN_SENT];
- tn->tcp_loose = nf_ct_tcp_loose;
- tn->tcp_be_liberal = nf_ct_tcp_be_liberal;
- tn->tcp_max_retrans = nf_ct_tcp_max_retrans;
+
+ /* If it is set to zero, we disable picking up already established
+ * connections.
+ */
+ tn->tcp_loose = 1;
+
+ /* "Be conservative in what you do,
+ * be liberal in what you accept from others."
+ * If it's non-zero, we mark only out of window RST segments as INVALID.
+ */
+ tn->tcp_be_liberal = 0;
+
+ /* Max number of the retransmitted packets without receiving an (acceptable)
+ * ACK from the destination. If this number is reached, a shorter timer
+ * will be started.
+ */
+ tn->tcp_max_retrans = 3;
}
const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp =
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 67f7d90563dd..aaa55246d0ca 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -425,14 +425,16 @@ static void ct_cpu_seq_stop(struct seq_file *seq, void *v)
static int ct_cpu_seq_show(struct seq_file *seq, void *v)
{
struct net *net = seq_file_net(seq);
- unsigned int nr_conntracks = atomic_read(&net->ct.count);
const struct ip_conntrack_stat *st = v;
+ unsigned int nr_conntracks;
if (v == SEQ_START_TOKEN) {
seq_puts(seq, "entries clashres found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete search_restart\n");
return 0;
}
+ nr_conntracks = nf_conntrack_count(net);
+
seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x "
"%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
nr_conntracks,
@@ -508,13 +510,19 @@ static void nf_conntrack_standalone_fini_proc(struct net *net)
}
#endif /* CONFIG_NF_CONNTRACK_PROCFS */
+u32 nf_conntrack_count(const struct net *net)
+{
+ const struct nf_conntrack_net *cnet;
+
+ cnet = net_generic(net, nf_conntrack_net_id);
+
+ return atomic_read(&cnet->count);
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_count);
+
/* Sysctl support */
#ifdef CONFIG_SYSCTL
-/* Log invalid packets of a given protocol */
-static int log_invalid_proto_min __read_mostly;
-static int log_invalid_proto_max __read_mostly = 255;
-
/* size the user *wants to set */
static unsigned int nf_conntrack_htable_size_user __read_mostly;
@@ -615,7 +623,6 @@ static struct ctl_table nf_ct_sysctl_table[] = {
},
[NF_SYSCTL_CT_COUNT] = {
.procname = "nf_conntrack_count",
- .data = &init_net.ct.count,
.maxlen = sizeof(int),
.mode = 0444,
.proc_handler = proc_dointvec,
@@ -630,20 +637,18 @@ static struct ctl_table nf_ct_sysctl_table[] = {
[NF_SYSCTL_CT_CHECKSUM] = {
.procname = "nf_conntrack_checksum",
.data = &init_net.ct.sysctl_checksum,
- .maxlen = sizeof(int),
+ .maxlen = sizeof(u8),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_dou8vec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
[NF_SYSCTL_CT_LOG_INVALID] = {
.procname = "nf_conntrack_log_invalid",
.data = &init_net.ct.sysctl_log_invalid,
- .maxlen = sizeof(unsigned int),
+ .maxlen = sizeof(u8),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &log_invalid_proto_min,
- .extra2 = &log_invalid_proto_max,
+ .proc_handler = proc_dou8vec_minmax,
},
[NF_SYSCTL_CT_EXPECT_MAX] = {
.procname = "nf_conntrack_expect_max",
@@ -655,18 +660,17 @@ static struct ctl_table nf_ct_sysctl_table[] = {
[NF_SYSCTL_CT_ACCT] = {
.procname = "nf_conntrack_acct",
.data = &init_net.ct.sysctl_acct,
- .maxlen = sizeof(int),
+ .maxlen = sizeof(u8),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_dou8vec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
[NF_SYSCTL_CT_HELPER] = {
.procname = "nf_conntrack_helper",
- .data = &init_net.ct.sysctl_auto_assign_helper,
- .maxlen = sizeof(int),
+ .maxlen = sizeof(u8),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_dou8vec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
@@ -674,9 +678,9 @@ static struct ctl_table nf_ct_sysctl_table[] = {
[NF_SYSCTL_CT_EVENTS] = {
.procname = "nf_conntrack_events",
.data = &init_net.ct.sysctl_events,
- .maxlen = sizeof(int),
+ .maxlen = sizeof(u8),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_dou8vec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
@@ -685,9 +689,9 @@ static struct ctl_table nf_ct_sysctl_table[] = {
[NF_SYSCTL_CT_TIMESTAMP] = {
.procname = "nf_conntrack_timestamp",
.data = &init_net.ct.sysctl_tstamp,
- .maxlen = sizeof(int),
+ .maxlen = sizeof(u8),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_dou8vec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
@@ -760,25 +764,25 @@ static struct ctl_table nf_ct_sysctl_table[] = {
},
[NF_SYSCTL_CT_PROTO_TCP_LOOSE] = {
.procname = "nf_conntrack_tcp_loose",
- .maxlen = sizeof(int),
+ .maxlen = sizeof(u8),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_dou8vec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
[NF_SYSCTL_CT_PROTO_TCP_LIBERAL] = {
.procname = "nf_conntrack_tcp_be_liberal",
- .maxlen = sizeof(int),
+ .maxlen = sizeof(u8),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_dou8vec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
[NF_SYSCTL_CT_PROTO_TCP_MAX_RETRANS] = {
.procname = "nf_conntrack_tcp_max_retrans",
- .maxlen = sizeof(unsigned int),
+ .maxlen = sizeof(u8),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dou8vec_minmax,
},
[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP] = {
.procname = "nf_conntrack_udp_timeout",
@@ -905,9 +909,9 @@ static struct ctl_table nf_ct_sysctl_table[] = {
},
[NF_SYSCTL_CT_PROTO_DCCP_LOOSE] = {
.procname = "nf_conntrack_dccp_loose",
- .maxlen = sizeof(int),
+ .maxlen = sizeof(u8),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_dou8vec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
@@ -1039,11 +1043,11 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
if (!table)
return -ENOMEM;
- table[NF_SYSCTL_CT_COUNT].data = &net->ct.count;
+ table[NF_SYSCTL_CT_COUNT].data = &cnet->count;
table[NF_SYSCTL_CT_CHECKSUM].data = &net->ct.sysctl_checksum;
table[NF_SYSCTL_CT_LOG_INVALID].data = &net->ct.sysctl_log_invalid;
table[NF_SYSCTL_CT_ACCT].data = &net->ct.sysctl_acct;
- table[NF_SYSCTL_CT_HELPER].data = &net->ct.sysctl_auto_assign_helper;
+ table[NF_SYSCTL_CT_HELPER].data = &cnet->sysctl_auto_assign_helper;
#ifdef CONFIG_NF_CONNTRACK_EVENTS
table[NF_SYSCTL_CT_EVENTS].data = &net->ct.sysctl_events;
#endif
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 76573bae6664..39c02d1aeedf 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -130,6 +130,9 @@ static int flow_offload_fill_route(struct flow_offload *flow,
flow_tuple->dst_cache = dst;
flow_tuple->dst_cookie = flow_offload_dst_cookie(flow_tuple);
break;
+ default:
+ WARN_ON_ONCE(1);
+ break;
}
flow_tuple->xmit_type = route->tuple[dir].xmit_type;
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index 4f1a145ff74b..2af7bdb38407 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -78,6 +78,16 @@ static void nf_flow_rule_lwt_match(struct nf_flow_match *match,
match->dissector.used_keys |= enc_keys;
}
+static void nf_flow_rule_vlan_match(struct flow_dissector_key_vlan *key,
+ struct flow_dissector_key_vlan *mask,
+ u16 vlan_id, __be16 proto)
+{
+ key->vlan_id = vlan_id;
+ mask->vlan_id = VLAN_VID_MASK;
+ key->vlan_tpid = proto;
+ mask->vlan_tpid = 0xffff;
+}
+
static int nf_flow_rule_match(struct nf_flow_match *match,
const struct flow_offload_tuple *tuple,
struct dst_entry *other_dst)
@@ -85,6 +95,7 @@ static int nf_flow_rule_match(struct nf_flow_match *match,
struct nf_flow_key *mask = &match->mask;
struct nf_flow_key *key = &match->key;
struct ip_tunnel_info *tun_info;
+ bool vlan_encap = false;
NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_META, meta);
NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CONTROL, control);
@@ -102,6 +113,32 @@ static int nf_flow_rule_match(struct nf_flow_match *match,
key->meta.ingress_ifindex = tuple->iifidx;
mask->meta.ingress_ifindex = 0xffffffff;
+ if (tuple->encap_num > 0 && !(tuple->in_vlan_ingress & BIT(0)) &&
+ tuple->encap[0].proto == htons(ETH_P_8021Q)) {
+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_VLAN, vlan);
+ nf_flow_rule_vlan_match(&key->vlan, &mask->vlan,
+ tuple->encap[0].id,
+ tuple->encap[0].proto);
+ vlan_encap = true;
+ }
+
+ if (tuple->encap_num > 1 && !(tuple->in_vlan_ingress & BIT(1)) &&
+ tuple->encap[1].proto == htons(ETH_P_8021Q)) {
+ if (vlan_encap) {
+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CVLAN,
+ cvlan);
+ nf_flow_rule_vlan_match(&key->cvlan, &mask->cvlan,
+ tuple->encap[1].id,
+ tuple->encap[1].proto);
+ } else {
+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_VLAN,
+ vlan);
+ nf_flow_rule_vlan_match(&key->vlan, &mask->vlan,
+ tuple->encap[1].id,
+ tuple->encap[1].proto);
+ }
+ }
+
switch (tuple->l3proto) {
case AF_INET:
key->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
@@ -582,6 +619,7 @@ nf_flow_rule_route_common(struct net *net, const struct flow_offload *flow,
struct nf_flow_rule *flow_rule)
{
const struct flow_offload_tuple *other_tuple;
+ const struct flow_offload_tuple *tuple;
int i;
flow_offload_decap_tunnel(flow, dir, flow_rule);
@@ -591,6 +629,20 @@ nf_flow_rule_route_common(struct net *net, const struct flow_offload *flow,
flow_offload_eth_dst(net, flow, dir, flow_rule) < 0)
return -1;
+ tuple = &flow->tuplehash[dir].tuple;
+
+ for (i = 0; i < tuple->encap_num; i++) {
+ struct flow_action_entry *entry;
+
+ if (tuple->in_vlan_ingress & BIT(i))
+ continue;
+
+ if (tuple->encap[i].proto == htons(ETH_P_8021Q)) {
+ entry = flow_action_entry_next(flow_rule);
+ entry->id = FLOW_ACTION_VLAN_POP;
+ }
+ }
+
other_tuple = &flow->tuplehash[!dir].tuple;
for (i = 0; i < other_tuple->encap_num; i++) {
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index bd581fbe5ce4..357443b3c0e4 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2878,6 +2878,9 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net,
goto nla_put_failure;
}
+ if (chain->flags & NFT_CHAIN_HW_OFFLOAD)
+ nft_flow_rule_stats(chain, rule);
+
list = nla_nest_start_noflag(skb, NFTA_RULE_EXPRESSIONS);
if (list == NULL)
goto nla_put_failure;
diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c
index 43b56eff3b04..19215e81dd66 100644
--- a/net/netfilter/nf_tables_offload.c
+++ b/net/netfilter/nf_tables_offload.c
@@ -47,6 +47,48 @@ void nft_flow_rule_set_addr_type(struct nft_flow_rule *flow,
offsetof(struct nft_flow_key, control);
}
+struct nft_offload_ethertype {
+ __be16 value;
+ __be16 mask;
+};
+
+static void nft_flow_rule_transfer_vlan(struct nft_offload_ctx *ctx,
+ struct nft_flow_rule *flow)
+{
+ struct nft_flow_match *match = &flow->match;
+ struct nft_offload_ethertype ethertype;
+
+ if (match->dissector.used_keys & BIT(FLOW_DISSECTOR_KEY_CONTROL) &&
+ match->key.basic.n_proto != htons(ETH_P_8021Q) &&
+ match->key.basic.n_proto != htons(ETH_P_8021AD))
+ return;
+
+ ethertype.value = match->key.basic.n_proto;
+ ethertype.mask = match->mask.basic.n_proto;
+
+ if (match->dissector.used_keys & BIT(FLOW_DISSECTOR_KEY_VLAN) &&
+ (match->key.vlan.vlan_tpid == htons(ETH_P_8021Q) ||
+ match->key.vlan.vlan_tpid == htons(ETH_P_8021AD))) {
+ match->key.basic.n_proto = match->key.cvlan.vlan_tpid;
+ match->mask.basic.n_proto = match->mask.cvlan.vlan_tpid;
+ match->key.cvlan.vlan_tpid = match->key.vlan.vlan_tpid;
+ match->mask.cvlan.vlan_tpid = match->mask.vlan.vlan_tpid;
+ match->key.vlan.vlan_tpid = ethertype.value;
+ match->mask.vlan.vlan_tpid = ethertype.mask;
+ match->dissector.offset[FLOW_DISSECTOR_KEY_CVLAN] =
+ offsetof(struct nft_flow_key, cvlan);
+ match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_CVLAN);
+ } else {
+ match->key.basic.n_proto = match->key.vlan.vlan_tpid;
+ match->mask.basic.n_proto = match->mask.vlan.vlan_tpid;
+ match->key.vlan.vlan_tpid = ethertype.value;
+ match->mask.vlan.vlan_tpid = ethertype.mask;
+ match->dissector.offset[FLOW_DISSECTOR_KEY_VLAN] =
+ offsetof(struct nft_flow_key, vlan);
+ match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_VLAN);
+ }
+}
+
struct nft_flow_rule *nft_flow_rule_create(struct net *net,
const struct nft_rule *rule)
{
@@ -91,6 +133,8 @@ struct nft_flow_rule *nft_flow_rule_create(struct net *net,
expr = nft_expr_next(expr);
}
+ nft_flow_rule_transfer_vlan(ctx, flow);
+
flow->proto = ctx->dep.l3num;
kfree(ctx);
@@ -199,26 +243,56 @@ static void nft_flow_cls_offload_setup(struct flow_cls_offload *cls_flow,
cls_flow->rule = flow->rule;
}
-static int nft_flow_offload_rule(struct nft_chain *chain,
- struct nft_rule *rule,
- struct nft_flow_rule *flow,
- enum flow_cls_command command)
+static int nft_flow_offload_cmd(const struct nft_chain *chain,
+ const struct nft_rule *rule,
+ struct nft_flow_rule *flow,
+ enum flow_cls_command command,
+ struct flow_cls_offload *cls_flow)
{
struct netlink_ext_ack extack = {};
- struct flow_cls_offload cls_flow;
struct nft_base_chain *basechain;
if (!nft_is_base_chain(chain))
return -EOPNOTSUPP;
basechain = nft_base_chain(chain);
- nft_flow_cls_offload_setup(&cls_flow, basechain, rule, flow, &extack,
+ nft_flow_cls_offload_setup(cls_flow, basechain, rule, flow, &extack,
command);
- return nft_setup_cb_call(TC_SETUP_CLSFLOWER, &cls_flow,
+ return nft_setup_cb_call(TC_SETUP_CLSFLOWER, cls_flow,
&basechain->flow_block.cb_list);
}
+static int nft_flow_offload_rule(const struct nft_chain *chain,
+ struct nft_rule *rule,
+ struct nft_flow_rule *flow,
+ enum flow_cls_command command)
+{
+ struct flow_cls_offload cls_flow;
+
+ return nft_flow_offload_cmd(chain, rule, flow, command, &cls_flow);
+}
+
+int nft_flow_rule_stats(const struct nft_chain *chain,
+ const struct nft_rule *rule)
+{
+ struct flow_cls_offload cls_flow = {};
+ struct nft_expr *expr, *next;
+ int err;
+
+ err = nft_flow_offload_cmd(chain, rule, NULL, FLOW_CLS_STATS,
+ &cls_flow);
+ if (err < 0)
+ return err;
+
+ nft_rule_for_each_expr(expr, next, rule) {
+ if (expr->ops->offload_stats)
+ expr->ops->offload_stats(expr, &cls_flow.stats);
+ }
+
+ return 0;
+}
+
static int nft_flow_offload_bind(struct flow_block_offload *bo,
struct nft_base_chain *basechain)
{
diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c
index eb6a43a180bb..47b6d05f1ae6 100644
--- a/net/netfilter/nft_cmp.c
+++ b/net/netfilter/nft_cmp.c
@@ -114,19 +114,56 @@ nla_put_failure:
return -1;
}
+union nft_cmp_offload_data {
+ u16 val16;
+ u32 val32;
+ u64 val64;
+};
+
+static void nft_payload_n2h(union nft_cmp_offload_data *data,
+ const u8 *val, u32 len)
+{
+ switch (len) {
+ case 2:
+ data->val16 = ntohs(*((u16 *)val));
+ break;
+ case 4:
+ data->val32 = ntohl(*((u32 *)val));
+ break;
+ case 8:
+ data->val64 = be64_to_cpu(*((u64 *)val));
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ break;
+ }
+}
+
static int __nft_cmp_offload(struct nft_offload_ctx *ctx,
struct nft_flow_rule *flow,
const struct nft_cmp_expr *priv)
{
struct nft_offload_reg *reg = &ctx->regs[priv->sreg];
+ union nft_cmp_offload_data _data, _datamask;
u8 *mask = (u8 *)&flow->match.mask;
u8 *key = (u8 *)&flow->match.key;
+ u8 *data, *datamask;
if (priv->op != NFT_CMP_EQ || priv->len > reg->len)
return -EOPNOTSUPP;
- memcpy(key + reg->offset, &priv->data, reg->len);
- memcpy(mask + reg->offset, &reg->mask, reg->len);
+ if (reg->flags & NFT_OFFLOAD_F_NETWORK2HOST) {
+ nft_payload_n2h(&_data, (u8 *)&priv->data, reg->len);
+ nft_payload_n2h(&_datamask, (u8 *)&reg->mask, reg->len);
+ data = (u8 *)&_data;
+ datamask = (u8 *)&_datamask;
+ } else {
+ data = (u8 *)&priv->data;
+ datamask = (u8 *)&reg->mask;
+ }
+
+ memcpy(key + reg->offset, data, reg->len);
+ memcpy(mask + reg->offset, datamask, reg->len);
flow->match.dissector.used_keys |= BIT(reg->key);
flow->match.dissector.offset[reg->key] = reg->base_offset;
diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
index 85ed461ec24e..8edd3b3c173d 100644
--- a/net/netfilter/nft_counter.c
+++ b/net/netfilter/nft_counter.c
@@ -13,6 +13,7 @@
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_offload.h>
struct nft_counter {
s64 bytes;
@@ -248,6 +249,32 @@ static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src)
return 0;
}
+static int nft_counter_offload(struct nft_offload_ctx *ctx,
+ struct nft_flow_rule *flow,
+ const struct nft_expr *expr)
+{
+ /* No specific offload action is needed, but report success. */
+ return 0;
+}
+
+static void nft_counter_offload_stats(struct nft_expr *expr,
+ const struct flow_stats *stats)
+{
+ struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
+ struct nft_counter *this_cpu;
+ seqcount_t *myseq;
+
+ preempt_disable();
+ this_cpu = this_cpu_ptr(priv->counter);
+ myseq = this_cpu_ptr(&nft_counter_seq);
+
+ write_seqcount_begin(myseq);
+ this_cpu->packets += stats->pkts;
+ this_cpu->bytes += stats->bytes;
+ write_seqcount_end(myseq);
+ preempt_enable();
+}
+
static struct nft_expr_type nft_counter_type;
static const struct nft_expr_ops nft_counter_ops = {
.type = &nft_counter_type,
@@ -258,6 +285,8 @@ static const struct nft_expr_ops nft_counter_ops = {
.destroy_clone = nft_counter_destroy,
.dump = nft_counter_dump,
.clone = nft_counter_clone,
+ .offload = nft_counter_offload,
+ .offload_stats = nft_counter_offload_stats,
};
static struct nft_expr_type nft_counter_type __read_mostly = {
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index cb1c8c231880..501c5b24cc39 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -226,8 +226,9 @@ static int nft_payload_offload_ll(struct nft_offload_ctx *ctx,
if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16)))
return -EOPNOTSUPP;
- NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_VLAN, vlan,
- vlan_tci, sizeof(__be16), reg);
+ NFT_OFFLOAD_MATCH_FLAGS(FLOW_DISSECTOR_KEY_VLAN, vlan,
+ vlan_tci, sizeof(__be16), reg,
+ NFT_OFFLOAD_F_NETWORK2HOST);
break;
case offsetof(struct vlan_ethhdr, h_vlan_encapsulated_proto):
if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16)))
@@ -241,16 +242,18 @@ static int nft_payload_offload_ll(struct nft_offload_ctx *ctx,
if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16)))
return -EOPNOTSUPP;
- NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_CVLAN, vlan,
- vlan_tci, sizeof(__be16), reg);
+ NFT_OFFLOAD_MATCH_FLAGS(FLOW_DISSECTOR_KEY_CVLAN, cvlan,
+ vlan_tci, sizeof(__be16), reg,
+ NFT_OFFLOAD_F_NETWORK2HOST);
break;
case offsetof(struct vlan_ethhdr, h_vlan_encapsulated_proto) +
sizeof(struct vlan_hdr):
if (!nft_payload_offload_mask(reg, priv->len, sizeof(__be16)))
return -EOPNOTSUPP;
- NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_CVLAN, vlan,
+ NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_CVLAN, cvlan,
vlan_tpid, sizeof(__be16), reg);
+ nft_offload_set_dependency(ctx, NFT_OFFLOAD_DEP_NETWORK);
break;
default:
return -EOPNOTSUPP;
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index 2b5707738609..76d9487fb03c 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -9,7 +9,7 @@ ret=0
ksft_skip=4
# all tests in this script. Can be overridden with -t option
-TESTS="unregister down carrier nexthop suppress ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics ipv4_route_metrics ipv4_route_v6_gw rp_filter ipv4_del_addr"
+TESTS="unregister down carrier nexthop suppress ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics ipv4_route_metrics ipv4_route_v6_gw rp_filter ipv4_del_addr ipv4_mangle ipv6_mangle"
VERBOSE=0
PAUSE_ON_FAIL=no
@@ -1653,6 +1653,154 @@ ipv4_route_v6_gw_test()
route_cleanup
}
+socat_check()
+{
+ if [ ! -x "$(command -v socat)" ]; then
+ echo "socat command not found. Skipping test"
+ return 1
+ fi
+
+ return 0
+}
+
+iptables_check()
+{
+ iptables -t mangle -L OUTPUT &> /dev/null
+ if [ $? -ne 0 ]; then
+ echo "iptables configuration not supported. Skipping test"
+ return 1
+ fi
+
+ return 0
+}
+
+ip6tables_check()
+{
+ ip6tables -t mangle -L OUTPUT &> /dev/null
+ if [ $? -ne 0 ]; then
+ echo "ip6tables configuration not supported. Skipping test"
+ return 1
+ fi
+
+ return 0
+}
+
+ipv4_mangle_test()
+{
+ local rc
+
+ echo
+ echo "IPv4 mangling tests"
+
+ socat_check || return 1
+ iptables_check || return 1
+
+ route_setup
+ sleep 2
+
+ local tmp_file=$(mktemp)
+ ip netns exec ns2 socat UDP4-LISTEN:54321,fork $tmp_file &
+
+ # Add a FIB rule and a route that will direct our connection to the
+ # listening server.
+ $IP rule add pref 100 ipproto udp sport 12345 dport 54321 table 123
+ $IP route add table 123 172.16.101.0/24 dev veth1
+
+ # Add an unreachable route to the main table that will block our
+ # connection in case the FIB rule is not hit.
+ $IP route add unreachable 172.16.101.2/32
+
+ run_cmd "echo a | $NS_EXEC socat STDIN UDP4:172.16.101.2:54321,sourceport=12345"
+ log_test $? 0 " Connection with correct parameters"
+
+ run_cmd "echo a | $NS_EXEC socat STDIN UDP4:172.16.101.2:54321,sourceport=11111"
+ log_test $? 1 " Connection with incorrect parameters"
+
+ # Add a mangling rule and make sure connection is still successful.
+ $NS_EXEC iptables -t mangle -A OUTPUT -j MARK --set-mark 1
+
+ run_cmd "echo a | $NS_EXEC socat STDIN UDP4:172.16.101.2:54321,sourceport=12345"
+ log_test $? 0 " Connection with correct parameters - mangling"
+
+ # Delete the mangling rule and make sure connection is still
+ # successful.
+ $NS_EXEC iptables -t mangle -D OUTPUT -j MARK --set-mark 1
+
+ run_cmd "echo a | $NS_EXEC socat STDIN UDP4:172.16.101.2:54321,sourceport=12345"
+ log_test $? 0 " Connection with correct parameters - no mangling"
+
+ # Verify connections were indeed successful on server side.
+ [[ $(cat $tmp_file | wc -l) -eq 3 ]]
+ log_test $? 0 " Connection check - server side"
+
+ $IP route del unreachable 172.16.101.2/32
+ $IP route del table 123 172.16.101.0/24 dev veth1
+ $IP rule del pref 100
+
+ { kill %% && wait %%; } 2>/dev/null
+ rm $tmp_file
+
+ route_cleanup
+}
+
+ipv6_mangle_test()
+{
+ local rc
+
+ echo
+ echo "IPv6 mangling tests"
+
+ socat_check || return 1
+ ip6tables_check || return 1
+
+ route_setup
+ sleep 2
+
+ local tmp_file=$(mktemp)
+ ip netns exec ns2 socat UDP6-LISTEN:54321,fork $tmp_file &
+
+ # Add a FIB rule and a route that will direct our connection to the
+ # listening server.
+ $IP -6 rule add pref 100 ipproto udp sport 12345 dport 54321 table 123
+ $IP -6 route add table 123 2001:db8:101::/64 dev veth1
+
+ # Add an unreachable route to the main table that will block our
+ # connection in case the FIB rule is not hit.
+ $IP -6 route add unreachable 2001:db8:101::2/128
+
+ run_cmd "echo a | $NS_EXEC socat STDIN UDP6:[2001:db8:101::2]:54321,sourceport=12345"
+ log_test $? 0 " Connection with correct parameters"
+
+ run_cmd "echo a | $NS_EXEC socat STDIN UDP6:[2001:db8:101::2]:54321,sourceport=11111"
+ log_test $? 1 " Connection with incorrect parameters"
+
+ # Add a mangling rule and make sure connection is still successful.
+ $NS_EXEC ip6tables -t mangle -A OUTPUT -j MARK --set-mark 1
+
+ run_cmd "echo a | $NS_EXEC socat STDIN UDP6:[2001:db8:101::2]:54321,sourceport=12345"
+ log_test $? 0 " Connection with correct parameters - mangling"
+
+ # Delete the mangling rule and make sure connection is still
+ # successful.
+ $NS_EXEC ip6tables -t mangle -D OUTPUT -j MARK --set-mark 1
+
+ run_cmd "echo a | $NS_EXEC socat STDIN UDP6:[2001:db8:101::2]:54321,sourceport=12345"
+ log_test $? 0 " Connection with correct parameters - no mangling"
+
+ # Verify connections were indeed successful on server side.
+ [[ $(cat $tmp_file | wc -l) -eq 3 ]]
+ log_test $? 0 " Connection check - server side"
+
+ $IP -6 route del unreachable 2001:db8:101::2/128
+ $IP -6 route del table 123 2001:db8:101::/64 dev veth1
+ $IP -6 rule del pref 100
+
+ { kill %% && wait %%; } 2>/dev/null
+ rm $tmp_file
+
+ route_cleanup
+}
+
################################################################################
# usage
@@ -1725,6 +1873,8 @@ do
ipv6_route_metrics) ipv6_route_metrics_test;;
ipv4_route_metrics) ipv4_route_metrics_test;;
ipv4_route_v6_gw) ipv4_route_v6_gw_test;;
+ ipv4_mangle) ipv4_mangle_test;;
+ ipv6_mangle) ipv6_mangle_test;;
help) echo "Test names: $TESTS"; exit 0;;
esac