summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2013-10-17 23:22:05 +0400
committerDavid S. Miller <davem@davemloft.net>2013-10-17 23:22:05 +0400
commitda33edccebcc36d387423dcdb557094fbda55994 (patch)
tree9f426a52f875169ae24e54a395beedc697c96e02 /net
parent78dea8cc4942c6adbcccc8f483463906a078f039 (diff)
parented683f138b3dbc8a5e878e24a0bfa0bb61043a09 (diff)
downloadlinux-da33edccebcc36d387423dcdb557094fbda55994.tar.xz
Merge branch 'net-next' of git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nftables
Pablo Neira Ayuso says: ==================== netfilter updates: nf_tables pull request The following patchset contains the current original nf_tables tree condensed in 17 patches. I have organized them by chronogical order since the original nf_tables code was released in 2009 and by dependencies between the different patches. The patches are: 1) Adapt all existing hooks in the tree to pass hook ops to the hook callback function, required by nf_tables, from Patrick McHardy. 2) Move alloc_null_binding to nf_nat_core, as it is now also needed by nf_tables and ip_tables, original patch from Patrick McHardy but required major changes to adapt it to the current tree that I made. 3) Add nf_tables core, including the netlink API, the packet filtering engine, expressions and built-in tables, from Patrick McHardy. This patch includes accumulated fixes since 2009 and minor enhancements. The patch description contains a list of references to the original patches for the record. For those that are not familiar to the original work, see [1], [2] and [3]. 4) Add netlink set API, this replaces the original set infrastructure to introduce a netlink API to add/delete sets and to add/delete set elements. This includes two set types: the hash and the rb-tree sets (used for interval based matching). The main difference with ipset is that this infrastructure is data type agnostic. Patch from Patrick McHardy. 5) Allow expression operation overload, this API change allows us to provide define expression subtypes depending on the configuration that is received from user-space via Netlink. It is used by follow up patches to provide optimized versions of the payload and cmp expressions and the x_tables compatibility layer, from Patrick McHardy. 6) Add optimized data comparison operation, it requires the previous patch, from Patrick McHardy. 7) Add optimized payload implementation, it requires patch 5, from Patrick McHardy. 8) Convert built-in tables to chain types. Each chain type have special semantics (filter, route and nat) that are used by userspace to configure the chain behaviour. The main chain regarding iptables is that tables become containers of chain, with no specific semantics. However, you may still configure your tables and chains to retain iptables like semantics, patch from me. 9) Add compatibility layer for x_tables. This patch adds support to use all existing x_tables extensions from nf_tables, this is used to provide a userspace utility that accepts iptables syntax but used internally the nf_tables kernel core. This patch includes missing features in the nf_tables core such as the per-chain stats, default chain policy and number of chain references, which are required by the iptables compatibility userspace tool. Patch from me. 10) Fix transport protocol matching, this fix is a side effect of the x_tables compatibility layer, which now provides a pointer to the transport header, from me. 11) Add support for dormant tables, this feature allows you to disable all chains and rules that are contained in one table, from me. 12) Add IPv6 NAT support. At the time nf_tables was made, there was no NAT IPv6 support yet, from Tomasz Bursztyka. 13) Complete net namespace support. This patch register the protocol family per net namespace, so tables (thus, other objects contained in tables such as sets, chains and rules) are only visible from the corresponding net namespace, from me. 14) Add the insert operation to the nf_tables netlink API, this requires adding a new position attribute that allow us to locate where in the ruleset a rule needs to be inserted, from Eric Leblond. 15) Add rule batching support, including atomic rule-set updates by using rule-set generations. This patch includes a change to nfnetlink to include two new control messages to indicate the beginning and the end of a batch. The end message is interpreted as the commit message, if it's missing, then the rule-set updates contained in the batch are aborted, from me. 16) Add trace support to the nf_tables packet filtering core, from me. 17) Add ARP filtering support, original patch from Patrick McHardy, but adapted to fit into the chain type infrastructure. This was recovered to be used by nft userspace tool and our compatibility arptables userspace tool. There is still work to do to fully replace x_tables [4] [5] but that can be done incrementally by extending our netlink API. Moreover, looking at netfilter-devel and the amount of contributions to nf_tables we've been getting, I think it would be good to have it mainstream to avoid accumulating large patchsets skip continuous rebases. I tried to provide a reasonable patchset, we have more than 100 accumulated patches in the original nf_tables tree, so I collapsed many of the small fixes to the main patch we had since 2009 and provide a small batch for review to netdev, while trying to retain part of the history. For those who didn't give a try to nf_tables yet, there's a quick howto available from Eric Leblond that describes how to get things working [6]. Comments/reviews welcome. Thanks! [1] http://lwn.net/Articles/324251/ [2] http://workshop.netfilter.org/2013/wiki/images/e/ee/Nftables-osd-2013-developer.pdf [3] http://lwn.net/Articles/564095/ [4] http://people.netfilter.org/pablo/map-pending-work.txt [4] http://people.netfilter.org/pablo/nftables-todo.txt [5] https://home.regit.org/netfilter-en/nftables-quick-howto/ ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/bridge/br_netfilter.c22
-rw-r--r--net/bridge/netfilter/Kconfig3
-rw-r--r--net/bridge/netfilter/Makefile2
-rw-r--r--net/bridge/netfilter/ebtable_filter.c16
-rw-r--r--net/bridge/netfilter/ebtable_nat.c16
-rw-r--r--net/bridge/netfilter/nf_tables_bridge.c65
-rw-r--r--net/decnet/netfilter/dn_rtmsg.c2
-rw-r--r--net/ipv4/netfilter/Kconfig21
-rw-r--r--net/ipv4/netfilter/Makefile6
-rw-r--r--net/ipv4/netfilter/arptable_filter.c5
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c2
-rw-r--r--net/ipv4/netfilter/ipt_SYNPROXY.c2
-rw-r--r--net/ipv4/netfilter/iptable_filter.c7
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c10
-rw-r--r--net/ipv4/netfilter/iptable_nat.c26
-rw-r--r--net/ipv4/netfilter/iptable_raw.c6
-rw-r--r--net/ipv4/netfilter/iptable_security.c7
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c12
-rw-r--r--net/ipv4/netfilter/nf_defrag_ipv4.c6
-rw-r--r--net/ipv4/netfilter/nf_tables_arp.c102
-rw-r--r--net/ipv4/netfilter/nf_tables_ipv4.c128
-rw-r--r--net/ipv4/netfilter/nft_chain_nat_ipv4.c205
-rw-r--r--net/ipv4/netfilter/nft_chain_route_ipv4.c90
-rw-r--r--net/ipv4/netfilter/nft_reject_ipv4.c123
-rw-r--r--net/ipv6/netfilter/Kconfig13
-rw-r--r--net/ipv6/netfilter/Makefile5
-rw-r--r--net/ipv6/netfilter/ip6t_SYNPROXY.c2
-rw-r--r--net/ipv6/netfilter/ip6table_filter.c5
-rw-r--r--net/ipv6/netfilter/ip6table_mangle.c10
-rw-r--r--net/ipv6/netfilter/ip6table_nat.c27
-rw-r--r--net/ipv6/netfilter/ip6table_raw.c5
-rw-r--r--net/ipv6/netfilter/ip6table_security.c5
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c14
-rw-r--r--net/ipv6/netfilter/nf_defrag_ipv6_hooks.c6
-rw-r--r--net/ipv6/netfilter/nf_tables_ipv6.c127
-rw-r--r--net/ipv6/netfilter/nft_chain_nat_ipv6.c211
-rw-r--r--net/ipv6/netfilter/nft_chain_route_ipv6.c88
-rw-r--r--net/netfilter/Kconfig52
-rw-r--r--net/netfilter/Makefile18
-rw-r--r--net/netfilter/core.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c42
-rw-r--r--net/netfilter/nf_nat_core.c20
-rw-r--r--net/netfilter/nf_tables_api.c3275
-rw-r--r--net/netfilter/nf_tables_core.c270
-rw-r--r--net/netfilter/nfnetlink.c175
-rw-r--r--net/netfilter/nft_bitwise.c146
-rw-r--r--net/netfilter/nft_byteorder.c173
-rw-r--r--net/netfilter/nft_cmp.c223
-rw-r--r--net/netfilter/nft_compat.c768
-rw-r--r--net/netfilter/nft_counter.c113
-rw-r--r--net/netfilter/nft_ct.c258
-rw-r--r--net/netfilter/nft_expr_template.c94
-rw-r--r--net/netfilter/nft_exthdr.c133
-rw-r--r--net/netfilter/nft_hash.c231
-rw-r--r--net/netfilter/nft_immediate.c132
-rw-r--r--net/netfilter/nft_limit.c119
-rw-r--r--net/netfilter/nft_log.c146
-rw-r--r--net/netfilter/nft_lookup.c141
-rw-r--r--net/netfilter/nft_meta.c228
-rw-r--r--net/netfilter/nft_meta_target.c117
-rw-r--r--net/netfilter/nft_nat.c220
-rw-r--r--net/netfilter/nft_payload.c160
-rw-r--r--net/netfilter/nft_rbtree.c247
63 files changed, 8785 insertions, 120 deletions
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index f87736270eaa..878f008afefa 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -619,7 +619,7 @@ bad:
/* Replicate the checks that IPv6 does on packet reception and pass the packet
* to ip6tables, which doesn't support NAT, so things are fairly simple. */
-static unsigned int br_nf_pre_routing_ipv6(unsigned int hook,
+static unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -669,7 +669,8 @@ static unsigned int br_nf_pre_routing_ipv6(unsigned int hook,
* receiving device) to make netfilter happy, the REDIRECT
* target in particular. Save the original destination IP
* address to be able to detect DNAT afterwards. */
-static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb,
+static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
@@ -691,7 +692,7 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb,
return NF_ACCEPT;
nf_bridge_pull_encap_header_rcsum(skb);
- return br_nf_pre_routing_ipv6(hook, skb, in, out, okfn);
+ return br_nf_pre_routing_ipv6(ops, skb, in, out, okfn);
}
if (!brnf_call_iptables && !br->nf_call_iptables)
@@ -727,7 +728,8 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb,
* took place when the packet entered the bridge), but we
* register an IPv4 PRE_ROUTING 'sabotage' hook that will
* prevent this from happening. */
-static unsigned int br_nf_local_in(unsigned int hook, struct sk_buff *skb,
+static unsigned int br_nf_local_in(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
@@ -765,7 +767,8 @@ static int br_nf_forward_finish(struct sk_buff *skb)
* but we are still able to filter on the 'real' indev/outdev
* because of the physdev module. For ARP, indev and outdev are the
* bridge ports. */
-static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb,
+static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
@@ -818,7 +821,8 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb,
return NF_STOLEN;
}
-static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff *skb,
+static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
@@ -878,7 +882,8 @@ static int br_nf_dev_queue_xmit(struct sk_buff *skb)
#endif
/* PF_BRIDGE/POST_ROUTING ********************************************/
-static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb,
+static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
@@ -923,7 +928,8 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb,
/* IP/SABOTAGE *****************************************************/
/* Don't hand locally destined packets to PF_INET(6)/PRE_ROUTING
* for the second time. */
-static unsigned int ip_sabotage_in(unsigned int hook, struct sk_buff *skb,
+static unsigned int ip_sabotage_in(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig
index a9aff9c7d027..68f8128147be 100644
--- a/net/bridge/netfilter/Kconfig
+++ b/net/bridge/netfilter/Kconfig
@@ -1,6 +1,9 @@
#
# Bridge netfilter configuration
#
+#
+config NF_TABLES_BRIDGE
+ tristate "Ethernet Bridge nf_tables support"
menuconfig BRIDGE_NF_EBTABLES
tristate "Ethernet Bridge tables (ebtables) support"
diff --git a/net/bridge/netfilter/Makefile b/net/bridge/netfilter/Makefile
index 0718699540b0..ea7629f58b3d 100644
--- a/net/bridge/netfilter/Makefile
+++ b/net/bridge/netfilter/Makefile
@@ -2,6 +2,8 @@
# Makefile for the netfilter modules for Link Layer filtering on a bridge.
#
+obj-$(CONFIG_NF_TABLES_BRIDGE) += nf_tables_bridge.o
+
obj-$(CONFIG_BRIDGE_NF_EBTABLES) += ebtables.o
# tables
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index 94b2b700cff8..bb2da7b706e7 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -60,17 +60,21 @@ static const struct ebt_table frame_filter =
};
static unsigned int
-ebt_in_hook(unsigned int hook, struct sk_buff *skb, const struct net_device *in,
- const struct net_device *out, int (*okfn)(struct sk_buff *))
+ebt_in_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ const struct net_device *in, const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
{
- return ebt_do_table(hook, skb, in, out, dev_net(in)->xt.frame_filter);
+ return ebt_do_table(ops->hooknum, skb, in, out,
+ dev_net(in)->xt.frame_filter);
}
static unsigned int
-ebt_out_hook(unsigned int hook, struct sk_buff *skb, const struct net_device *in,
- const struct net_device *out, int (*okfn)(struct sk_buff *))
+ebt_out_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ const struct net_device *in, const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
{
- return ebt_do_table(hook, skb, in, out, dev_net(out)->xt.frame_filter);
+ return ebt_do_table(ops->hooknum, skb, in, out,
+ dev_net(out)->xt.frame_filter);
}
static struct nf_hook_ops ebt_ops_filter[] __read_mostly = {
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
index 322555acdd40..bd238f1f105b 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -60,17 +60,21 @@ static struct ebt_table frame_nat =
};
static unsigned int
-ebt_nat_in(unsigned int hook, struct sk_buff *skb, const struct net_device *in
- , const struct net_device *out, int (*okfn)(struct sk_buff *))
+ebt_nat_in(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ const struct net_device *in, const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
{
- return ebt_do_table(hook, skb, in, out, dev_net(in)->xt.frame_nat);
+ return ebt_do_table(ops->hooknum, skb, in, out,
+ dev_net(in)->xt.frame_nat);
}
static unsigned int
-ebt_nat_out(unsigned int hook, struct sk_buff *skb, const struct net_device *in
- , const struct net_device *out, int (*okfn)(struct sk_buff *))
+ebt_nat_out(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ const struct net_device *in, const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
{
- return ebt_do_table(hook, skb, in, out, dev_net(out)->xt.frame_nat);
+ return ebt_do_table(ops->hooknum, skb, in, out,
+ dev_net(out)->xt.frame_nat);
}
static struct nf_hook_ops ebt_ops_nat[] __read_mostly = {
diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c
new file mode 100644
index 000000000000..e8cb016fa34d
--- /dev/null
+++ b/net/bridge/netfilter/nf_tables_bridge.c
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netfilter_bridge.h>
+#include <net/netfilter/nf_tables.h>
+
+static struct nft_af_info nft_af_bridge __read_mostly = {
+ .family = NFPROTO_BRIDGE,
+ .nhooks = NF_BR_NUMHOOKS,
+ .owner = THIS_MODULE,
+};
+
+static int nf_tables_bridge_init_net(struct net *net)
+{
+ net->nft.bridge = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
+ if (net->nft.bridge == NULL)
+ return -ENOMEM;
+
+ memcpy(net->nft.bridge, &nft_af_bridge, sizeof(nft_af_bridge));
+
+ if (nft_register_afinfo(net, net->nft.bridge) < 0)
+ goto err;
+
+ return 0;
+err:
+ kfree(net->nft.bridge);
+ return -ENOMEM;
+}
+
+static void nf_tables_bridge_exit_net(struct net *net)
+{
+ nft_unregister_afinfo(net->nft.bridge);
+ kfree(net->nft.bridge);
+}
+
+static struct pernet_operations nf_tables_bridge_net_ops = {
+ .init = nf_tables_bridge_init_net,
+ .exit = nf_tables_bridge_exit_net,
+};
+
+static int __init nf_tables_bridge_init(void)
+{
+ return register_pernet_subsys(&nf_tables_bridge_net_ops);
+}
+
+static void __exit nf_tables_bridge_exit(void)
+{
+ return unregister_pernet_subsys(&nf_tables_bridge_net_ops);
+}
+
+module_init(nf_tables_bridge_init);
+module_exit(nf_tables_bridge_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_FAMILY(AF_BRIDGE);
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
index 2a7efe388344..e83015cecfa7 100644
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ b/net/decnet/netfilter/dn_rtmsg.c
@@ -87,7 +87,7 @@ static void dnrmg_send_peer(struct sk_buff *skb)
}
-static unsigned int dnrmg_hook(unsigned int hook,
+static unsigned int dnrmg_hook(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 1657e39b291f..40d56073cd19 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -36,6 +36,27 @@ config NF_CONNTRACK_PROC_COMPAT
If unsure, say Y.
+config NF_TABLES_IPV4
+ depends on NF_TABLES
+ tristate "IPv4 nf_tables support"
+
+config NFT_REJECT_IPV4
+ depends on NF_TABLES_IPV4
+ tristate "nf_tables IPv4 reject support"
+
+config NFT_CHAIN_ROUTE_IPV4
+ depends on NF_TABLES_IPV4
+ tristate "IPv4 nf_tables route chain support"
+
+config NFT_CHAIN_NAT_IPV4
+ depends on NF_TABLES_IPV4
+ depends on NF_NAT_IPV4 && NFT_NAT
+ tristate "IPv4 nf_tables nat chain support"
+
+config NF_TABLES_ARP
+ depends on NF_TABLES
+ tristate "ARP nf_tables support"
+
config IP_NF_IPTABLES
tristate "IP tables support (required for filtering/masq/NAT)"
default m if NETFILTER_ADVANCED=n
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 3622b248b6dd..19df72b7ba88 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -27,6 +27,12 @@ obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o
# NAT protocols (nf_nat)
obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o
+obj-$(CONFIG_NF_TABLES_IPV4) += nf_tables_ipv4.o
+obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o
+obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o
+obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o
+obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o
+
# generic IP tables
obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index a865f6f94013..802ddecb30b8 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -27,13 +27,14 @@ static const struct xt_table packet_filter = {
/* The work comes in here from netfilter.c */
static unsigned int
-arptable_filter_hook(unsigned int hook, struct sk_buff *skb,
+arptable_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
const struct net *net = dev_net((in != NULL) ? in : out);
- return arpt_do_table(skb, hook, in, out, net->ipv4.arptable_filter);
+ return arpt_do_table(skb, ops->hooknum, in, out,
+ net->ipv4.arptable_filter);
}
static struct nf_hook_ops *arpfilter_ops __read_mostly;
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 0b732efd32e2..a2e2b61cd7da 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -483,7 +483,7 @@ static void arp_print(struct arp_payload *payload)
#endif
static unsigned int
-arp_mangle(unsigned int hook,
+arp_mangle(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c
index b6346bf2fde3..01cffeaa0085 100644
--- a/net/ipv4/netfilter/ipt_SYNPROXY.c
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c
@@ -297,7 +297,7 @@ synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par)
return XT_CONTINUE;
}
-static unsigned int ipv4_synproxy_hook(unsigned int hooknum,
+static unsigned int ipv4_synproxy_hook(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 50af5b45c050..e08a74a243a8 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -33,20 +33,21 @@ static const struct xt_table packet_filter = {
};
static unsigned int
-iptable_filter_hook(unsigned int hook, struct sk_buff *skb,
+iptable_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
const struct net *net;
- if (hook == NF_INET_LOCAL_OUT &&
+ if (ops->hooknum == NF_INET_LOCAL_OUT &&
(skb->len < sizeof(struct iphdr) ||
ip_hdrlen(skb) < sizeof(struct iphdr)))
/* root is playing with raw sockets. */
return NF_ACCEPT;
net = dev_net((in != NULL) ? in : out);
- return ipt_do_table(skb, hook, in, out, net->ipv4.iptable_filter);
+ return ipt_do_table(skb, ops->hooknum, in, out,
+ net->ipv4.iptable_filter);
}
static struct nf_hook_ops *filter_ops __read_mostly;
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 0d8cd82e0fad..6a5079c34bb3 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -79,19 +79,19 @@ ipt_mangle_out(struct sk_buff *skb, const struct net_device *out)
/* The work comes in here from netfilter.c. */
static unsigned int
-iptable_mangle_hook(unsigned int hook,
+iptable_mangle_hook(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- if (hook == NF_INET_LOCAL_OUT)
+ if (ops->hooknum == NF_INET_LOCAL_OUT)
return ipt_mangle_out(skb, out);
- if (hook == NF_INET_POST_ROUTING)
- return ipt_do_table(skb, hook, in, out,
+ if (ops->hooknum == NF_INET_POST_ROUTING)
+ return ipt_do_table(skb, ops->hooknum, in, out,
dev_net(out)->ipv4.iptable_mangle);
/* PREROUTING/INPUT/FORWARD: */
- return ipt_do_table(skb, hook, in, out,
+ return ipt_do_table(skb, ops->hooknum, in, out,
dev_net(in)->ipv4.iptable_mangle);
}
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index 683bfaffed65..ee2886126e3d 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -61,7 +61,7 @@ static unsigned int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum,
}
static unsigned int
-nf_nat_ipv4_fn(unsigned int hooknum,
+nf_nat_ipv4_fn(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -71,7 +71,7 @@ nf_nat_ipv4_fn(unsigned int hooknum,
enum ip_conntrack_info ctinfo;
struct nf_conn_nat *nat;
/* maniptype == SRC for postrouting. */
- enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum);
+ enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum);
/* We never see fragments: conntrack defrags on pre-routing
* and local-out, and nf_nat_out protects post-routing.
@@ -108,7 +108,7 @@ nf_nat_ipv4_fn(unsigned int hooknum,
case IP_CT_RELATED_REPLY:
if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
- hooknum))
+ ops->hooknum))
return NF_DROP;
else
return NF_ACCEPT;
@@ -121,14 +121,14 @@ nf_nat_ipv4_fn(unsigned int hooknum,
if (!nf_nat_initialized(ct, maniptype)) {
unsigned int ret;
- ret = nf_nat_rule_find(skb, hooknum, in, out, ct);
+ ret = nf_nat_rule_find(skb, ops->hooknum, in, out, ct);
if (ret != NF_ACCEPT)
return ret;
} else {
pr_debug("Already setup manip %s for ct %p\n",
maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
ct);
- if (nf_nat_oif_changed(hooknum, ctinfo, nat, out))
+ if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out))
goto oif_changed;
}
break;
@@ -137,11 +137,11 @@ nf_nat_ipv4_fn(unsigned int hooknum,
/* ESTABLISHED */
NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
ctinfo == IP_CT_ESTABLISHED_REPLY);
- if (nf_nat_oif_changed(hooknum, ctinfo, nat, out))
+ if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out))
goto oif_changed;
}
- return nf_nat_packet(ct, ctinfo, hooknum, skb);
+ return nf_nat_packet(ct, ctinfo, ops->hooknum, skb);
oif_changed:
nf_ct_kill_acct(ct, ctinfo, skb);
@@ -149,7 +149,7 @@ oif_changed:
}
static unsigned int
-nf_nat_ipv4_in(unsigned int hooknum,
+nf_nat_ipv4_in(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -158,7 +158,7 @@ nf_nat_ipv4_in(unsigned int hooknum,
unsigned int ret;
__be32 daddr = ip_hdr(skb)->daddr;
- ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn);
+ ret = nf_nat_ipv4_fn(ops, skb, in, out, okfn);
if (ret != NF_DROP && ret != NF_STOLEN &&
daddr != ip_hdr(skb)->daddr)
skb_dst_drop(skb);
@@ -167,7 +167,7 @@ nf_nat_ipv4_in(unsigned int hooknum,
}
static unsigned int
-nf_nat_ipv4_out(unsigned int hooknum,
+nf_nat_ipv4_out(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -185,7 +185,7 @@ nf_nat_ipv4_out(unsigned int hooknum,
ip_hdrlen(skb) < sizeof(struct iphdr))
return NF_ACCEPT;
- ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn);
+ ret = nf_nat_ipv4_fn(ops, skb, in, out, okfn);
#ifdef CONFIG_XFRM
if (ret != NF_DROP && ret != NF_STOLEN &&
!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
@@ -207,7 +207,7 @@ nf_nat_ipv4_out(unsigned int hooknum,
}
static unsigned int
-nf_nat_ipv4_local_fn(unsigned int hooknum,
+nf_nat_ipv4_local_fn(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -223,7 +223,7 @@ nf_nat_ipv4_local_fn(unsigned int hooknum,
ip_hdrlen(skb) < sizeof(struct iphdr))
return NF_ACCEPT;
- ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn);
+ ret = nf_nat_ipv4_fn(ops, skb, in, out, okfn);
if (ret != NF_DROP && ret != NF_STOLEN &&
(ct = nf_ct_get(skb, &ctinfo)) != NULL) {
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index 1f82aea11df6..b2f7e8f98316 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -20,20 +20,20 @@ static const struct xt_table packet_raw = {
/* The work comes in here from netfilter.c. */
static unsigned int
-iptable_raw_hook(unsigned int hook, struct sk_buff *skb,
+iptable_raw_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
const struct net *net;
- if (hook == NF_INET_LOCAL_OUT &&
+ if (ops->hooknum == NF_INET_LOCAL_OUT &&
(skb->len < sizeof(struct iphdr) ||
ip_hdrlen(skb) < sizeof(struct iphdr)))
/* root is playing with raw sockets. */
return NF_ACCEPT;
net = dev_net((in != NULL) ? in : out);
- return ipt_do_table(skb, hook, in, out, net->ipv4.iptable_raw);
+ return ipt_do_table(skb, ops->hooknum, in, out, net->ipv4.iptable_raw);
}
static struct nf_hook_ops *rawtable_ops __read_mostly;
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index f867a8d38bf7..c86647ed2078 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -37,21 +37,22 @@ static const struct xt_table security_table = {
};
static unsigned int
-iptable_security_hook(unsigned int hook, struct sk_buff *skb,
+iptable_security_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
const struct net *net;
- if (hook == NF_INET_LOCAL_OUT &&
+ if (ops->hooknum == NF_INET_LOCAL_OUT &&
(skb->len < sizeof(struct iphdr) ||
ip_hdrlen(skb) < sizeof(struct iphdr)))
/* Somebody is playing with raw sockets. */
return NF_ACCEPT;
net = dev_net((in != NULL) ? in : out);
- return ipt_do_table(skb, hook, in, out, net->ipv4.iptable_security);
+ return ipt_do_table(skb, ops->hooknum, in, out,
+ net->ipv4.iptable_security);
}
static struct nf_hook_ops *sectbl_ops __read_mostly;
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 86f5b34a4ed1..ecd8bec411c9 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -92,7 +92,7 @@ static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
return NF_ACCEPT;
}
-static unsigned int ipv4_helper(unsigned int hooknum,
+static unsigned int ipv4_helper(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -121,7 +121,7 @@ static unsigned int ipv4_helper(unsigned int hooknum,
ct, ctinfo);
}
-static unsigned int ipv4_confirm(unsigned int hooknum,
+static unsigned int ipv4_confirm(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -147,16 +147,16 @@ out:
return nf_conntrack_confirm(skb);
}
-static unsigned int ipv4_conntrack_in(unsigned int hooknum,
+static unsigned int ipv4_conntrack_in(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return nf_conntrack_in(dev_net(in), PF_INET, hooknum, skb);
+ return nf_conntrack_in(dev_net(in), PF_INET, ops->hooknum, skb);
}
-static unsigned int ipv4_conntrack_local(unsigned int hooknum,
+static unsigned int ipv4_conntrack_local(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -166,7 +166,7 @@ static unsigned int ipv4_conntrack_local(unsigned int hooknum,
if (skb->len < sizeof(struct iphdr) ||
ip_hdrlen(skb) < sizeof(struct iphdr))
return NF_ACCEPT;
- return nf_conntrack_in(dev_net(out), PF_INET, hooknum, skb);
+ return nf_conntrack_in(dev_net(out), PF_INET, ops->hooknum, skb);
}
/* Connection tracking may drop packets, but never alters them, so
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index 742815518b0f..12e13bd82b5b 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -60,7 +60,7 @@ static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum,
return IP_DEFRAG_CONNTRACK_OUT + zone;
}
-static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
+static unsigned int ipv4_conntrack_defrag(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -83,7 +83,9 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
#endif
/* Gather fragments. */
if (ip_is_fragment(ip_hdr(skb))) {
- enum ip_defrag_users user = nf_ct_defrag_user(hooknum, skb);
+ enum ip_defrag_users user =
+ nf_ct_defrag_user(ops->hooknum, skb);
+
if (nf_ct_ipv4_gather_frags(skb, user))
return NF_STOLEN;
}
diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c
new file mode 100644
index 000000000000..3e67ef1c676f
--- /dev/null
+++ b/net/ipv4/netfilter/nf_tables_arp.c
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2008-2010 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2013 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/netfilter_arp.h>
+#include <net/netfilter/nf_tables.h>
+
+static struct nft_af_info nft_af_arp __read_mostly = {
+ .family = NFPROTO_ARP,
+ .nhooks = NF_ARP_NUMHOOKS,
+ .owner = THIS_MODULE,
+};
+
+static int nf_tables_arp_init_net(struct net *net)
+{
+ net->nft.arp = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
+ if (net->nft.arp== NULL)
+ return -ENOMEM;
+
+ memcpy(net->nft.arp, &nft_af_arp, sizeof(nft_af_arp));
+
+ if (nft_register_afinfo(net, net->nft.arp) < 0)
+ goto err;
+
+ return 0;
+err:
+ kfree(net->nft.arp);
+ return -ENOMEM;
+}
+
+static void nf_tables_arp_exit_net(struct net *net)
+{
+ nft_unregister_afinfo(net->nft.arp);
+ kfree(net->nft.arp);
+}
+
+static struct pernet_operations nf_tables_arp_net_ops = {
+ .init = nf_tables_arp_init_net,
+ .exit = nf_tables_arp_exit_net,
+};
+
+static unsigned int
+nft_do_chain_arp(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct nft_pktinfo pkt;
+
+ nft_set_pktinfo(&pkt, ops, skb, in, out);
+
+ return nft_do_chain_pktinfo(&pkt, ops);
+}
+
+static struct nf_chain_type filter_arp = {
+ .family = NFPROTO_ARP,
+ .name = "filter",
+ .type = NFT_CHAIN_T_DEFAULT,
+ .hook_mask = (1 << NF_ARP_IN) |
+ (1 << NF_ARP_OUT) |
+ (1 << NF_ARP_FORWARD),
+ .fn = {
+ [NF_ARP_IN] = nft_do_chain_arp,
+ [NF_ARP_OUT] = nft_do_chain_arp,
+ [NF_ARP_FORWARD] = nft_do_chain_arp,
+ },
+};
+
+static int __init nf_tables_arp_init(void)
+{
+ int ret;
+
+ nft_register_chain_type(&filter_arp);
+ ret = register_pernet_subsys(&nf_tables_arp_net_ops);
+ if (ret < 0)
+ nft_unregister_chain_type(&filter_arp);
+
+ return ret;
+}
+
+static void __exit nf_tables_arp_exit(void)
+{
+ unregister_pernet_subsys(&nf_tables_arp_net_ops);
+ nft_unregister_chain_type(&filter_arp);
+}
+
+module_init(nf_tables_arp_init);
+module_exit(nf_tables_arp_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_FAMILY(3); /* NFPROTO_ARP */
diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c
new file mode 100644
index 000000000000..8f7536be1322
--- /dev/null
+++ b/net/ipv4/netfilter/nf_tables_ipv4.c
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2012-2013 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <linux/netfilter_ipv4.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/net_namespace.h>
+#include <net/ip.h>
+#include <net/net_namespace.h>
+#include <net/netfilter/nf_tables_ipv4.h>
+
+static unsigned int nft_ipv4_output(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct nft_pktinfo pkt;
+
+ if (unlikely(skb->len < sizeof(struct iphdr) ||
+ ip_hdr(skb)->ihl < sizeof(struct iphdr) / 4)) {
+ if (net_ratelimit())
+ pr_info("nf_tables_ipv4: ignoring short SOCK_RAW "
+ "packet\n");
+ return NF_ACCEPT;
+ }
+ nft_set_pktinfo_ipv4(&pkt, ops, skb, in, out);
+
+ return nft_do_chain_pktinfo(&pkt, ops);
+}
+
+static struct nft_af_info nft_af_ipv4 __read_mostly = {
+ .family = NFPROTO_IPV4,
+ .nhooks = NF_INET_NUMHOOKS,
+ .owner = THIS_MODULE,
+ .hooks = {
+ [NF_INET_LOCAL_OUT] = nft_ipv4_output,
+ },
+};
+
+static int nf_tables_ipv4_init_net(struct net *net)
+{
+ net->nft.ipv4 = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
+ if (net->nft.ipv4 == NULL)
+ return -ENOMEM;
+
+ memcpy(net->nft.ipv4, &nft_af_ipv4, sizeof(nft_af_ipv4));
+
+ if (nft_register_afinfo(net, net->nft.ipv4) < 0)
+ goto err;
+
+ return 0;
+err:
+ kfree(net->nft.ipv4);
+ return -ENOMEM;
+}
+
+static void nf_tables_ipv4_exit_net(struct net *net)
+{
+ nft_unregister_afinfo(net->nft.ipv4);
+ kfree(net->nft.ipv4);
+}
+
+static struct pernet_operations nf_tables_ipv4_net_ops = {
+ .init = nf_tables_ipv4_init_net,
+ .exit = nf_tables_ipv4_exit_net,
+};
+
+static unsigned int
+nft_do_chain_ipv4(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct nft_pktinfo pkt;
+
+ nft_set_pktinfo_ipv4(&pkt, ops, skb, in, out);
+
+ return nft_do_chain_pktinfo(&pkt, ops);
+}
+
+static struct nf_chain_type filter_ipv4 = {
+ .family = NFPROTO_IPV4,
+ .name = "filter",
+ .type = NFT_CHAIN_T_DEFAULT,
+ .hook_mask = (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_FORWARD) |
+ (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_POST_ROUTING),
+ .fn = {
+ [NF_INET_LOCAL_IN] = nft_do_chain_ipv4,
+ [NF_INET_LOCAL_OUT] = nft_ipv4_output,
+ [NF_INET_FORWARD] = nft_do_chain_ipv4,
+ [NF_INET_PRE_ROUTING] = nft_do_chain_ipv4,
+ [NF_INET_POST_ROUTING] = nft_do_chain_ipv4,
+ },
+};
+
+static int __init nf_tables_ipv4_init(void)
+{
+ nft_register_chain_type(&filter_ipv4);
+ return register_pernet_subsys(&nf_tables_ipv4_net_ops);
+}
+
+static void __exit nf_tables_ipv4_exit(void)
+{
+ unregister_pernet_subsys(&nf_tables_ipv4_net_ops);
+ nft_unregister_chain_type(&filter_ipv4);
+}
+
+module_init(nf_tables_ipv4_init);
+module_exit(nf_tables_ipv4_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_FAMILY(AF_INET);
diff --git a/net/ipv4/netfilter/nft_chain_nat_ipv4.c b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
new file mode 100644
index 000000000000..cf2c792cd971
--- /dev/null
+++ b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
@@ -0,0 +1,205 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2012 Pablo Neira Ayuso <pablo@netfilter.org>
+ * Copyright (c) 2012 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_ipv4.h>
+#include <net/netfilter/nf_nat_l3proto.h>
+#include <net/ip.h>
+
+/*
+ * NAT chains
+ */
+
+static unsigned int nf_nat_fn(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+ struct nf_conn_nat *nat;
+ enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum);
+ struct nft_pktinfo pkt;
+ unsigned int ret;
+
+ if (ct == NULL || nf_ct_is_untracked(ct))
+ return NF_ACCEPT;
+
+ NF_CT_ASSERT(!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)));
+
+ nat = nfct_nat(ct);
+ if (nat == NULL) {
+ /* Conntrack module was loaded late, can't add extension. */
+ if (nf_ct_is_confirmed(ct))
+ return NF_ACCEPT;
+ nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
+ if (nat == NULL)
+ return NF_ACCEPT;
+ }
+
+ switch (ctinfo) {
+ case IP_CT_RELATED:
+ case IP_CT_RELATED + IP_CT_IS_REPLY:
+ if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
+ if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
+ ops->hooknum))
+ return NF_DROP;
+ else
+ return NF_ACCEPT;
+ }
+ /* Fall through */
+ case IP_CT_NEW:
+ if (nf_nat_initialized(ct, maniptype))
+ break;
+
+ nft_set_pktinfo_ipv4(&pkt, ops, skb, in, out);
+
+ ret = nft_do_chain_pktinfo(&pkt, ops);
+ if (ret != NF_ACCEPT)
+ return ret;
+ if (!nf_nat_initialized(ct, maniptype)) {
+ ret = nf_nat_alloc_null_binding(ct, ops->hooknum);
+ if (ret != NF_ACCEPT)
+ return ret;
+ }
+ default:
+ break;
+ }
+
+ return nf_nat_packet(ct, ctinfo, ops->hooknum, skb);
+}
+
+static unsigned int nf_nat_prerouting(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ __be32 daddr = ip_hdr(skb)->daddr;
+ unsigned int ret;
+
+ ret = nf_nat_fn(ops, skb, in, out, okfn);
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ ip_hdr(skb)->daddr != daddr) {
+ skb_dst_drop(skb);
+ }
+ return ret;
+}
+
+static unsigned int nf_nat_postrouting(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ enum ip_conntrack_info ctinfo __maybe_unused;
+ const struct nf_conn *ct __maybe_unused;
+ unsigned int ret;
+
+ ret = nf_nat_fn(ops, skb, in, out, okfn);
+#ifdef CONFIG_XFRM
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+ if (ct->tuplehash[dir].tuple.src.u3.ip !=
+ ct->tuplehash[!dir].tuple.dst.u3.ip ||
+ ct->tuplehash[dir].tuple.src.u.all !=
+ ct->tuplehash[!dir].tuple.dst.u.all)
+ return nf_xfrm_me_harder(skb, AF_INET) == 0 ?
+ ret : NF_DROP;
+ }
+#endif
+ return ret;
+}
+
+static unsigned int nf_nat_output(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ enum ip_conntrack_info ctinfo;
+ const struct nf_conn *ct;
+ unsigned int ret;
+
+ ret = nf_nat_fn(ops, skb, in, out, okfn);
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+ if (ct->tuplehash[dir].tuple.dst.u3.ip !=
+ ct->tuplehash[!dir].tuple.src.u3.ip) {
+ if (ip_route_me_harder(skb, RTN_UNSPEC))
+ ret = NF_DROP;
+ }
+#ifdef CONFIG_XFRM
+ else if (ct->tuplehash[dir].tuple.dst.u.all !=
+ ct->tuplehash[!dir].tuple.src.u.all)
+ if (nf_xfrm_me_harder(skb, AF_INET))
+ ret = NF_DROP;
+#endif
+ }
+ return ret;
+}
+
+static struct nf_chain_type nft_chain_nat_ipv4 = {
+ .family = NFPROTO_IPV4,
+ .name = "nat",
+ .type = NFT_CHAIN_T_NAT,
+ .hook_mask = (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_POST_ROUTING) |
+ (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_LOCAL_IN),
+ .fn = {
+ [NF_INET_PRE_ROUTING] = nf_nat_prerouting,
+ [NF_INET_POST_ROUTING] = nf_nat_postrouting,
+ [NF_INET_LOCAL_OUT] = nf_nat_output,
+ [NF_INET_LOCAL_IN] = nf_nat_fn,
+ },
+ .me = THIS_MODULE,
+};
+
+static int __init nft_chain_nat_init(void)
+{
+ int err;
+
+ err = nft_register_chain_type(&nft_chain_nat_ipv4);
+ if (err < 0)
+ return err;
+
+ return 0;
+}
+
+static void __exit nft_chain_nat_exit(void)
+{
+ nft_unregister_chain_type(&nft_chain_nat_ipv4);
+}
+
+module_init(nft_chain_nat_init);
+module_exit(nft_chain_nat_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_CHAIN(AF_INET, "nat");
diff --git a/net/ipv4/netfilter/nft_chain_route_ipv4.c b/net/ipv4/netfilter/nft_chain_route_ipv4.c
new file mode 100644
index 000000000000..4e6bf9a3d7aa
--- /dev/null
+++ b/net/ipv4/netfilter/nft_chain_route_ipv4.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2012 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_ipv4.h>
+#include <net/route.h>
+#include <net/ip.h>
+
+static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ unsigned int ret;
+ struct nft_pktinfo pkt;
+ u32 mark;
+ __be32 saddr, daddr;
+ u_int8_t tos;
+ const struct iphdr *iph;
+
+ /* root is playing with raw sockets. */
+ if (skb->len < sizeof(struct iphdr) ||
+ ip_hdrlen(skb) < sizeof(struct iphdr))
+ return NF_ACCEPT;
+
+ nft_set_pktinfo_ipv4(&pkt, ops, skb, in, out);
+
+ mark = skb->mark;
+ iph = ip_hdr(skb);
+ saddr = iph->saddr;
+ daddr = iph->daddr;
+ tos = iph->tos;
+
+ ret = nft_do_chain_pktinfo(&pkt, ops);
+ if (ret != NF_DROP && ret != NF_QUEUE) {
+ iph = ip_hdr(skb);
+
+ if (iph->saddr != saddr ||
+ iph->daddr != daddr ||
+ skb->mark != mark ||
+ iph->tos != tos)
+ if (ip_route_me_harder(skb, RTN_UNSPEC))
+ ret = NF_DROP;
+ }
+ return ret;
+}
+
+static struct nf_chain_type nft_chain_route_ipv4 = {
+ .family = NFPROTO_IPV4,
+ .name = "route",
+ .type = NFT_CHAIN_T_ROUTE,
+ .hook_mask = (1 << NF_INET_LOCAL_OUT),
+ .fn = {
+ [NF_INET_LOCAL_OUT] = nf_route_table_hook,
+ },
+ .me = THIS_MODULE,
+};
+
+static int __init nft_chain_route_init(void)
+{
+ return nft_register_chain_type(&nft_chain_route_ipv4);
+}
+
+static void __exit nft_chain_route_exit(void)
+{
+ nft_unregister_chain_type(&nft_chain_route_ipv4);
+}
+
+module_init(nft_chain_route_init);
+module_exit(nft_chain_route_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_CHAIN(AF_INET, "route");
diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c
new file mode 100644
index 000000000000..fff5ba1a33b7
--- /dev/null
+++ b/net/ipv4/netfilter/nft_reject_ipv4.c
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/icmp.h>
+
+struct nft_reject {
+ enum nft_reject_types type:8;
+ u8 icmp_code;
+};
+
+static void nft_reject_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_reject *priv = nft_expr_priv(expr);
+
+ switch (priv->type) {
+ case NFT_REJECT_ICMP_UNREACH:
+ icmp_send(pkt->skb, ICMP_DEST_UNREACH, priv->icmp_code, 0);
+ break;
+ case NFT_REJECT_TCP_RST:
+ break;
+ }
+
+ data[NFT_REG_VERDICT].verdict = NF_DROP;
+}
+
+static const struct nla_policy nft_reject_policy[NFTA_REJECT_MAX + 1] = {
+ [NFTA_REJECT_TYPE] = { .type = NLA_U32 },
+ [NFTA_REJECT_ICMP_CODE] = { .type = NLA_U8 },
+};
+
+static int nft_reject_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_reject *priv = nft_expr_priv(expr);
+
+ if (tb[NFTA_REJECT_TYPE] == NULL)
+ return -EINVAL;
+
+ priv->type = ntohl(nla_get_be32(tb[NFTA_REJECT_TYPE]));
+ switch (priv->type) {
+ case NFT_REJECT_ICMP_UNREACH:
+ if (tb[NFTA_REJECT_ICMP_CODE] == NULL)
+ return -EINVAL;
+ priv->icmp_code = nla_get_u8(tb[NFTA_REJECT_ICMP_CODE]);
+ case NFT_REJECT_TCP_RST:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int nft_reject_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_reject *priv = nft_expr_priv(expr);
+
+ if (nla_put_be32(skb, NFTA_REJECT_TYPE, priv->type))
+ goto nla_put_failure;
+
+ switch (priv->type) {
+ case NFT_REJECT_ICMP_UNREACH:
+ if (nla_put_u8(skb, NFTA_REJECT_ICMP_CODE, priv->icmp_code))
+ goto nla_put_failure;
+ break;
+ }
+
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static struct nft_expr_type nft_reject_type;
+static const struct nft_expr_ops nft_reject_ops = {
+ .type = &nft_reject_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_reject)),
+ .eval = nft_reject_eval,
+ .init = nft_reject_init,
+ .dump = nft_reject_dump,
+};
+
+static struct nft_expr_type nft_reject_type __read_mostly = {
+ .name = "reject",
+ .ops = &nft_reject_ops,
+ .policy = nft_reject_policy,
+ .maxattr = NFTA_REJECT_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_reject_module_init(void)
+{
+ return nft_register_expr(&nft_reject_type);
+}
+
+static void __exit nft_reject_module_exit(void)
+{
+ nft_unregister_expr(&nft_reject_type);
+}
+
+module_init(nft_reject_module_init);
+module_exit(nft_reject_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("reject");
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index a7f842b29b67..7702f9e90a04 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -25,6 +25,19 @@ config NF_CONNTRACK_IPV6
To compile it as a module, choose M here. If unsure, say N.
+config NF_TABLES_IPV6
+ depends on NF_TABLES
+ tristate "IPv6 nf_tables support"
+
+config NFT_CHAIN_ROUTE_IPV6
+ depends on NF_TABLES_IPV6
+ tristate "IPv6 nf_tables route chain support"
+
+config NFT_CHAIN_NAT_IPV6
+ depends on NF_TABLES_IPV6
+ depends on NF_NAT_IPV6 && NFT_NAT
+ tristate "IPv6 nf_tables nat chain support"
+
config IP6_NF_IPTABLES
tristate "IP6 tables support (required for filtering)"
depends on INET && IPV6
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index 2b53738f798c..d1b4928f34f7 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -23,6 +23,11 @@ obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o
nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o
obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o
+# nf_tables
+obj-$(CONFIG_NF_TABLES_IPV6) += nf_tables_ipv6.o
+obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV6) += nft_chain_route_ipv6.o
+obj-$(CONFIG_NFT_CHAIN_NAT_IPV6) += nft_chain_nat_ipv6.o
+
# matches
obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o
obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o
diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c
index 2748b042da72..bf9f612c1bc2 100644
--- a/net/ipv6/netfilter/ip6t_SYNPROXY.c
+++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c
@@ -312,7 +312,7 @@ synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par)
return XT_CONTINUE;
}
-static unsigned int ipv6_synproxy_hook(unsigned int hooknum,
+static unsigned int ipv6_synproxy_hook(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 29b44b14c5ea..ca7f6c128086 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -32,13 +32,14 @@ static const struct xt_table packet_filter = {
/* The work comes in here from netfilter.c. */
static unsigned int
-ip6table_filter_hook(unsigned int hook, struct sk_buff *skb,
+ip6table_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
const struct net *net = dev_net((in != NULL) ? in : out);
- return ip6t_do_table(skb, hook, in, out, net->ipv6.ip6table_filter);
+ return ip6t_do_table(skb, ops->hooknum, in, out,
+ net->ipv6.ip6table_filter);
}
static struct nf_hook_ops *filter_ops __read_mostly;
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index c705907ae6ab..307bbb782d14 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -76,17 +76,17 @@ ip6t_mangle_out(struct sk_buff *skb, const struct net_device *out)
/* The work comes in here from netfilter.c. */
static unsigned int
-ip6table_mangle_hook(unsigned int hook, struct sk_buff *skb,
+ip6table_mangle_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- if (hook == NF_INET_LOCAL_OUT)
+ if (ops->hooknum == NF_INET_LOCAL_OUT)
return ip6t_mangle_out(skb, out);
- if (hook == NF_INET_POST_ROUTING)
- return ip6t_do_table(skb, hook, in, out,
+ if (ops->hooknum == NF_INET_POST_ROUTING)
+ return ip6t_do_table(skb, ops->hooknum, in, out,
dev_net(out)->ipv6.ip6table_mangle);
/* INPUT/FORWARD */
- return ip6t_do_table(skb, hook, in, out,
+ return ip6t_do_table(skb, ops->hooknum, in, out,
dev_net(in)->ipv6.ip6table_mangle);
}
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index 9b076d2d3a7b..84c7f33d0cf8 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -63,7 +63,7 @@ static unsigned int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum,
}
static unsigned int
-nf_nat_ipv6_fn(unsigned int hooknum,
+nf_nat_ipv6_fn(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -72,7 +72,7 @@ nf_nat_ipv6_fn(unsigned int hooknum,
struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
struct nf_conn_nat *nat;
- enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum);
+ enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum);
__be16 frag_off;
int hdrlen;
u8 nexthdr;
@@ -111,7 +111,8 @@ nf_nat_ipv6_fn(unsigned int hooknum,
if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo,
- hooknum, hdrlen))
+ ops->hooknum,
+ hdrlen))
return NF_DROP;
else
return NF_ACCEPT;
@@ -124,14 +125,14 @@ nf_nat_ipv6_fn(unsigned int hooknum,
if (!nf_nat_initialized(ct, maniptype)) {
unsigned int ret;
- ret = nf_nat_rule_find(skb, hooknum, in, out, ct);
+ ret = nf_nat_rule_find(skb, ops->hooknum, in, out, ct);
if (ret != NF_ACCEPT)
return ret;
} else {
pr_debug("Already setup manip %s for ct %p\n",
maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
ct);
- if (nf_nat_oif_changed(hooknum, ctinfo, nat, out))
+ if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out))
goto oif_changed;
}
break;
@@ -140,11 +141,11 @@ nf_nat_ipv6_fn(unsigned int hooknum,
/* ESTABLISHED */
NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
ctinfo == IP_CT_ESTABLISHED_REPLY);
- if (nf_nat_oif_changed(hooknum, ctinfo, nat, out))
+ if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out))
goto oif_changed;
}
- return nf_nat_packet(ct, ctinfo, hooknum, skb);
+ return nf_nat_packet(ct, ctinfo, ops->hooknum, skb);
oif_changed:
nf_ct_kill_acct(ct, ctinfo, skb);
@@ -152,7 +153,7 @@ oif_changed:
}
static unsigned int
-nf_nat_ipv6_in(unsigned int hooknum,
+nf_nat_ipv6_in(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -161,7 +162,7 @@ nf_nat_ipv6_in(unsigned int hooknum,
unsigned int ret;
struct in6_addr daddr = ipv6_hdr(skb)->daddr;
- ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn);
+ ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn);
if (ret != NF_DROP && ret != NF_STOLEN &&
ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr))
skb_dst_drop(skb);
@@ -170,7 +171,7 @@ nf_nat_ipv6_in(unsigned int hooknum,
}
static unsigned int
-nf_nat_ipv6_out(unsigned int hooknum,
+nf_nat_ipv6_out(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -187,7 +188,7 @@ nf_nat_ipv6_out(unsigned int hooknum,
if (skb->len < sizeof(struct ipv6hdr))
return NF_ACCEPT;
- ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn);
+ ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn);
#ifdef CONFIG_XFRM
if (ret != NF_DROP && ret != NF_STOLEN &&
!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
@@ -209,7 +210,7 @@ nf_nat_ipv6_out(unsigned int hooknum,
}
static unsigned int
-nf_nat_ipv6_local_fn(unsigned int hooknum,
+nf_nat_ipv6_local_fn(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -224,7 +225,7 @@ nf_nat_ipv6_local_fn(unsigned int hooknum,
if (skb->len < sizeof(struct ipv6hdr))
return NF_ACCEPT;
- ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn);
+ ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn);
if (ret != NF_DROP && ret != NF_STOLEN &&
(ct = nf_ct_get(skb, &ctinfo)) != NULL) {
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index 9a626d86720f..5274740acecc 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -19,13 +19,14 @@ static const struct xt_table packet_raw = {
/* The work comes in here from netfilter.c. */
static unsigned int
-ip6table_raw_hook(unsigned int hook, struct sk_buff *skb,
+ip6table_raw_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
const struct net *net = dev_net((in != NULL) ? in : out);
- return ip6t_do_table(skb, hook, in, out, net->ipv6.ip6table_raw);
+ return ip6t_do_table(skb, ops->hooknum, in, out,
+ net->ipv6.ip6table_raw);
}
static struct nf_hook_ops *rawtable_ops __read_mostly;
diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
index ce88d1d7e525..ab3b0219ecfa 100644
--- a/net/ipv6/netfilter/ip6table_security.c
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -36,14 +36,15 @@ static const struct xt_table security_table = {
};
static unsigned int
-ip6table_security_hook(unsigned int hook, struct sk_buff *skb,
+ip6table_security_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
const struct net *net = dev_net((in != NULL) ? in : out);
- return ip6t_do_table(skb, hook, in, out, net->ipv6.ip6table_security);
+ return ip6t_do_table(skb, ops->hooknum, in, out,
+ net->ipv6.ip6table_security);
}
static struct nf_hook_ops *sectbl_ops __read_mostly;
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 54b75ead5a69..486545eb42ce 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -95,7 +95,7 @@ static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
return NF_ACCEPT;
}
-static unsigned int ipv6_helper(unsigned int hooknum,
+static unsigned int ipv6_helper(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -133,7 +133,7 @@ static unsigned int ipv6_helper(unsigned int hooknum,
return helper->help(skb, protoff, ct, ctinfo);
}
-static unsigned int ipv6_confirm(unsigned int hooknum,
+static unsigned int ipv6_confirm(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -219,16 +219,17 @@ static unsigned int __ipv6_conntrack_in(struct net *net,
return nf_conntrack_in(net, PF_INET6, hooknum, skb);
}
-static unsigned int ipv6_conntrack_in(unsigned int hooknum,
+static unsigned int ipv6_conntrack_in(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return __ipv6_conntrack_in(dev_net(in), hooknum, skb, in, out, okfn);
+ return __ipv6_conntrack_in(dev_net(in), ops->hooknum, skb, in, out,
+ okfn);
}
-static unsigned int ipv6_conntrack_local(unsigned int hooknum,
+static unsigned int ipv6_conntrack_local(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -239,7 +240,8 @@ static unsigned int ipv6_conntrack_local(unsigned int hooknum,
net_notice_ratelimited("ipv6_conntrack_local: packet too short\n");
return NF_ACCEPT;
}
- return __ipv6_conntrack_in(dev_net(out), hooknum, skb, in, out, okfn);
+ return __ipv6_conntrack_in(dev_net(out), ops->hooknum, skb, in, out,
+ okfn);
}
static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index aacd121fe8c5..ec483aa3f60f 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -52,7 +52,7 @@ static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
}
-static unsigned int ipv6_defrag(unsigned int hooknum,
+static unsigned int ipv6_defrag(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -66,7 +66,7 @@ static unsigned int ipv6_defrag(unsigned int hooknum,
return NF_ACCEPT;
#endif
- reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(hooknum, skb));
+ reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(ops->hooknum, skb));
/* queued */
if (reasm == NULL)
return NF_STOLEN;
@@ -75,7 +75,7 @@ static unsigned int ipv6_defrag(unsigned int hooknum,
if (reasm == skb)
return NF_ACCEPT;
- nf_ct_frag6_output(hooknum, reasm, (struct net_device *)in,
+ nf_ct_frag6_output(ops->hooknum, reasm, (struct net_device *)in,
(struct net_device *)out, okfn);
return NF_STOLEN;
diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c
new file mode 100644
index 000000000000..d77db8a13505
--- /dev/null
+++ b/net/ipv6/netfilter/nf_tables_ipv6.c
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2012-2013 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/ipv6.h>
+#include <linux/netfilter_ipv6.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_ipv6.h>
+
+static unsigned int nft_ipv6_output(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct nft_pktinfo pkt;
+
+ if (unlikely(skb->len < sizeof(struct ipv6hdr))) {
+ if (net_ratelimit())
+ pr_info("nf_tables_ipv6: ignoring short SOCK_RAW "
+ "packet\n");
+ return NF_ACCEPT;
+ }
+ if (nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out) < 0)
+ return NF_DROP;
+
+ return nft_do_chain_pktinfo(&pkt, ops);
+}
+
+static struct nft_af_info nft_af_ipv6 __read_mostly = {
+ .family = NFPROTO_IPV6,
+ .nhooks = NF_INET_NUMHOOKS,
+ .owner = THIS_MODULE,
+ .hooks = {
+ [NF_INET_LOCAL_OUT] = nft_ipv6_output,
+ },
+};
+
+static int nf_tables_ipv6_init_net(struct net *net)
+{
+ net->nft.ipv6 = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
+ if (net->nft.ipv6 == NULL)
+ return -ENOMEM;
+
+ memcpy(net->nft.ipv6, &nft_af_ipv6, sizeof(nft_af_ipv6));
+
+ if (nft_register_afinfo(net, net->nft.ipv6) < 0)
+ goto err;
+
+ return 0;
+err:
+ kfree(net->nft.ipv6);
+ return -ENOMEM;
+}
+
+static void nf_tables_ipv6_exit_net(struct net *net)
+{
+ nft_unregister_afinfo(net->nft.ipv6);
+ kfree(net->nft.ipv6);
+}
+
+static struct pernet_operations nf_tables_ipv6_net_ops = {
+ .init = nf_tables_ipv6_init_net,
+ .exit = nf_tables_ipv6_exit_net,
+};
+
+static unsigned int
+nft_do_chain_ipv6(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct nft_pktinfo pkt;
+
+ /* malformed packet, drop it */
+ if (nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out) < 0)
+ return NF_DROP;
+
+ return nft_do_chain_pktinfo(&pkt, ops);
+}
+
+static struct nf_chain_type filter_ipv6 = {
+ .family = NFPROTO_IPV6,
+ .name = "filter",
+ .type = NFT_CHAIN_T_DEFAULT,
+ .hook_mask = (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_FORWARD) |
+ (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_POST_ROUTING),
+ .fn = {
+ [NF_INET_LOCAL_IN] = nft_do_chain_ipv6,
+ [NF_INET_LOCAL_OUT] = nft_ipv6_output,
+ [NF_INET_FORWARD] = nft_do_chain_ipv6,
+ [NF_INET_PRE_ROUTING] = nft_do_chain_ipv6,
+ [NF_INET_POST_ROUTING] = nft_do_chain_ipv6,
+ },
+};
+
+static int __init nf_tables_ipv6_init(void)
+{
+ nft_register_chain_type(&filter_ipv6);
+ return register_pernet_subsys(&nf_tables_ipv6_net_ops);
+}
+
+static void __exit nf_tables_ipv6_exit(void)
+{
+ unregister_pernet_subsys(&nf_tables_ipv6_net_ops);
+ nft_unregister_chain_type(&filter_ipv6);
+}
+
+module_init(nf_tables_ipv6_init);
+module_exit(nf_tables_ipv6_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_FAMILY(AF_INET6);
diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
new file mode 100644
index 000000000000..e86dcd70dc76
--- /dev/null
+++ b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
@@ -0,0 +1,211 @@
+/*
+ * Copyright (c) 2011 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2012 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_ipv6.h>
+#include <net/netfilter/nf_nat_l3proto.h>
+#include <net/ipv6.h>
+
+/*
+ * IPv6 NAT chains
+ */
+
+static unsigned int nf_nat_ipv6_fn(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+ struct nf_conn_nat *nat;
+ enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum);
+ __be16 frag_off;
+ int hdrlen;
+ u8 nexthdr;
+ struct nft_pktinfo pkt;
+ unsigned int ret;
+
+ if (ct == NULL || nf_ct_is_untracked(ct))
+ return NF_ACCEPT;
+
+ nat = nfct_nat(ct);
+ if (nat == NULL) {
+ /* Conntrack module was loaded late, can't add extension. */
+ if (nf_ct_is_confirmed(ct))
+ return NF_ACCEPT;
+ nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
+ if (nat == NULL)
+ return NF_ACCEPT;
+ }
+
+ switch (ctinfo) {
+ case IP_CT_RELATED:
+ case IP_CT_RELATED + IP_CT_IS_REPLY:
+ nexthdr = ipv6_hdr(skb)->nexthdr;
+ hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
+ &nexthdr, &frag_off);
+
+ if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
+ if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo,
+ ops->hooknum,
+ hdrlen))
+ return NF_DROP;
+ else
+ return NF_ACCEPT;
+ }
+ /* Fall through */
+ case IP_CT_NEW:
+ if (nf_nat_initialized(ct, maniptype))
+ break;
+
+ nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out);
+
+ ret = nft_do_chain_pktinfo(&pkt, ops);
+ if (ret != NF_ACCEPT)
+ return ret;
+ if (!nf_nat_initialized(ct, maniptype)) {
+ ret = nf_nat_alloc_null_binding(ct, ops->hooknum);
+ if (ret != NF_ACCEPT)
+ return ret;
+ }
+ default:
+ break;
+ }
+
+ return nf_nat_packet(ct, ctinfo, ops->hooknum, skb);
+}
+
+static unsigned int nf_nat_ipv6_prerouting(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct in6_addr daddr = ipv6_hdr(skb)->daddr;
+ unsigned int ret;
+
+ ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn);
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr))
+ skb_dst_drop(skb);
+
+ return ret;
+}
+
+static unsigned int nf_nat_ipv6_postrouting(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ enum ip_conntrack_info ctinfo __maybe_unused;
+ const struct nf_conn *ct __maybe_unused;
+ unsigned int ret;
+
+ ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn);
+#ifdef CONFIG_XFRM
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ !(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
+ (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+ if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3,
+ &ct->tuplehash[!dir].tuple.dst.u3) ||
+ (ct->tuplehash[dir].tuple.src.u.all !=
+ ct->tuplehash[!dir].tuple.dst.u.all))
+ if (nf_xfrm_me_harder(skb, AF_INET6) < 0)
+ ret = NF_DROP;
+ }
+#endif
+ return ret;
+}
+
+static unsigned int nf_nat_ipv6_output(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ enum ip_conntrack_info ctinfo;
+ const struct nf_conn *ct;
+ unsigned int ret;
+
+ ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn);
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+ if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3,
+ &ct->tuplehash[!dir].tuple.src.u3)) {
+ if (ip6_route_me_harder(skb))
+ ret = NF_DROP;
+ }
+#ifdef CONFIG_XFRM
+ else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
+ ct->tuplehash[dir].tuple.dst.u.all !=
+ ct->tuplehash[!dir].tuple.src.u.all)
+ if (nf_xfrm_me_harder(skb, AF_INET6))
+ ret = NF_DROP;
+#endif
+ }
+ return ret;
+}
+
+static struct nf_chain_type nft_chain_nat_ipv6 = {
+ .family = NFPROTO_IPV6,
+ .name = "nat",
+ .type = NFT_CHAIN_T_NAT,
+ .hook_mask = (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_POST_ROUTING) |
+ (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_LOCAL_IN),
+ .fn = {
+ [NF_INET_PRE_ROUTING] = nf_nat_ipv6_prerouting,
+ [NF_INET_POST_ROUTING] = nf_nat_ipv6_postrouting,
+ [NF_INET_LOCAL_OUT] = nf_nat_ipv6_output,
+ [NF_INET_LOCAL_IN] = nf_nat_ipv6_fn,
+ },
+ .me = THIS_MODULE,
+};
+
+static int __init nft_chain_nat_ipv6_init(void)
+{
+ int err;
+
+ err = nft_register_chain_type(&nft_chain_nat_ipv6);
+ if (err < 0)
+ return err;
+
+ return 0;
+}
+
+static void __exit nft_chain_nat_ipv6_exit(void)
+{
+ nft_unregister_chain_type(&nft_chain_nat_ipv6);
+}
+
+module_init(nft_chain_nat_ipv6_init);
+module_exit(nft_chain_nat_ipv6_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Tomasz Bursztyka <tomasz.bursztyka@linux.intel.com>");
+MODULE_ALIAS_NFT_CHAIN(AF_INET6, "nat");
diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c
new file mode 100644
index 000000000000..3fe40f0456ad
--- /dev/null
+++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2012 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_ipv6.h>
+#include <net/route.h>
+
+static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ unsigned int ret;
+ struct nft_pktinfo pkt;
+ struct in6_addr saddr, daddr;
+ u_int8_t hop_limit;
+ u32 mark, flowlabel;
+
+ /* malformed packet, drop it */
+ if (nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out) < 0)
+ return NF_DROP;
+
+ /* save source/dest address, mark, hoplimit, flowlabel, priority */
+ memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr));
+ memcpy(&daddr, &ipv6_hdr(skb)->daddr, sizeof(daddr));
+ mark = skb->mark;
+ hop_limit = ipv6_hdr(skb)->hop_limit;
+
+ /* flowlabel and prio (includes version, which shouldn't change either */
+ flowlabel = *((u32 *)ipv6_hdr(skb));
+
+ ret = nft_do_chain_pktinfo(&pkt, ops);
+ if (ret != NF_DROP && ret != NF_QUEUE &&
+ (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) ||
+ memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) ||
+ skb->mark != mark ||
+ ipv6_hdr(skb)->hop_limit != hop_limit ||
+ flowlabel != *((u_int32_t *)ipv6_hdr(skb))))
+ return ip6_route_me_harder(skb) == 0 ? ret : NF_DROP;
+
+ return ret;
+}
+
+static struct nf_chain_type nft_chain_route_ipv6 = {
+ .family = NFPROTO_IPV6,
+ .name = "route",
+ .type = NFT_CHAIN_T_ROUTE,
+ .hook_mask = (1 << NF_INET_LOCAL_OUT),
+ .fn = {
+ [NF_INET_LOCAL_OUT] = nf_route_table_hook,
+ },
+ .me = THIS_MODULE,
+};
+
+static int __init nft_chain_route_init(void)
+{
+ return nft_register_chain_type(&nft_chain_route_ipv6);
+}
+
+static void __exit nft_chain_route_exit(void)
+{
+ nft_unregister_chain_type(&nft_chain_route_ipv6);
+}
+
+module_init(nft_chain_route_init);
+module_exit(nft_chain_route_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_CHAIN(AF_INET6, "route");
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 6e839b6dff2b..48acec17e27a 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -413,6 +413,58 @@ config NETFILTER_SYNPROXY
endif # NF_CONNTRACK
+config NF_TABLES
+ depends on NETFILTER_NETLINK
+ tristate "Netfilter nf_tables support"
+
+config NFT_EXTHDR
+ depends on NF_TABLES
+ tristate "Netfilter nf_tables IPv6 exthdr module"
+
+config NFT_META
+ depends on NF_TABLES
+ tristate "Netfilter nf_tables meta module"
+
+config NFT_CT
+ depends on NF_TABLES
+ depends on NF_CONNTRACK
+ tristate "Netfilter nf_tables conntrack module"
+
+config NFT_RBTREE
+ depends on NF_TABLES
+ tristate "Netfilter nf_tables rbtree set module"
+
+config NFT_HASH
+ depends on NF_TABLES
+ tristate "Netfilter nf_tables hash set module"
+
+config NFT_COUNTER
+ depends on NF_TABLES
+ tristate "Netfilter nf_tables counter module"
+
+config NFT_LOG
+ depends on NF_TABLES
+ tristate "Netfilter nf_tables log module"
+
+config NFT_LIMIT
+ depends on NF_TABLES
+ tristate "Netfilter nf_tables limit module"
+
+config NFT_NAT
+ depends on NF_TABLES
+ depends on NF_CONNTRACK
+ depends on NF_NAT
+ tristate "Netfilter nf_tables nat module"
+
+config NFT_COMPAT
+ depends on NF_TABLES
+ depends on NETFILTER_XTABLES
+ tristate "Netfilter x_tables over nf_tables module"
+ help
+ This is required if you intend to use any of existing
+ x_tables match/target extensions over the nf_tables
+ framework.
+
config NETFILTER_XTABLES
tristate "Netfilter Xtables support (required for ip_tables)"
default m if NETFILTER_ADVANCED=n
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index c3a0a12907f6..394483b2c193 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -64,6 +64,24 @@ obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o
# SYNPROXY
obj-$(CONFIG_NETFILTER_SYNPROXY) += nf_synproxy_core.o
+# nf_tables
+nf_tables-objs += nf_tables_core.o nf_tables_api.o
+nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o
+nf_tables-objs += nft_bitwise.o nft_byteorder.o nft_payload.o
+
+obj-$(CONFIG_NF_TABLES) += nf_tables.o
+obj-$(CONFIG_NFT_COMPAT) += nft_compat.o
+obj-$(CONFIG_NFT_EXTHDR) += nft_exthdr.o
+obj-$(CONFIG_NFT_META) += nft_meta.o
+obj-$(CONFIG_NFT_CT) += nft_ct.o
+obj-$(CONFIG_NFT_LIMIT) += nft_limit.o
+obj-$(CONFIG_NFT_NAT) += nft_nat.o
+#nf_tables-objs += nft_meta_target.o
+obj-$(CONFIG_NFT_RBTREE) += nft_rbtree.o
+obj-$(CONFIG_NFT_HASH) += nft_hash.o
+obj-$(CONFIG_NFT_COUNTER) += nft_counter.o
+obj-$(CONFIG_NFT_LOG) += nft_log.o
+
# generic X tables
obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 593b16ea45e0..1fbab0cdd302 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -146,7 +146,7 @@ unsigned int nf_iterate(struct list_head *head,
/* Optimization: we don't need to hold module
reference here, since function can't sleep. --RR */
repeat:
- verdict = (*elemp)->hook(hook, skb, indev, outdev, okfn);
+ verdict = (*elemp)->hook(*elemp, skb, indev, outdev, okfn);
if (verdict != NF_ACCEPT) {
#ifdef CONFIG_NETFILTER_DEBUG
if (unlikely((verdict & NF_VERDICT_MASK)
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 74fd00c27210..34fda62f40f6 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1239,11 +1239,11 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
* Check if packet is reply for established ip_vs_conn.
*/
static unsigned int
-ip_vs_reply4(unsigned int hooknum, struct sk_buff *skb,
+ip_vs_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return ip_vs_out(hooknum, skb, AF_INET);
+ return ip_vs_out(ops->hooknum, skb, AF_INET);
}
/*
@@ -1251,11 +1251,11 @@ ip_vs_reply4(unsigned int hooknum, struct sk_buff *skb,
* Check if packet is reply for established ip_vs_conn.
*/
static unsigned int
-ip_vs_local_reply4(unsigned int hooknum, struct sk_buff *skb,
+ip_vs_local_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return ip_vs_out(hooknum, skb, AF_INET);
+ return ip_vs_out(ops->hooknum, skb, AF_INET);
}
#ifdef CONFIG_IP_VS_IPV6
@@ -1266,11 +1266,11 @@ ip_vs_local_reply4(unsigned int hooknum, struct sk_buff *skb,
* Check if packet is reply for established ip_vs_conn.
*/
static unsigned int
-ip_vs_reply6(unsigned int hooknum, struct sk_buff *skb,
+ip_vs_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return ip_vs_out(hooknum, skb, AF_INET6);
+ return ip_vs_out(ops->hooknum, skb, AF_INET6);
}
/*
@@ -1278,11 +1278,11 @@ ip_vs_reply6(unsigned int hooknum, struct sk_buff *skb,
* Check if packet is reply for established ip_vs_conn.
*/
static unsigned int
-ip_vs_local_reply6(unsigned int hooknum, struct sk_buff *skb,
+ip_vs_local_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return ip_vs_out(hooknum, skb, AF_INET6);
+ return ip_vs_out(ops->hooknum, skb, AF_INET6);
}
#endif
@@ -1733,12 +1733,12 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
* Schedule and forward packets from remote clients
*/
static unsigned int
-ip_vs_remote_request4(unsigned int hooknum, struct sk_buff *skb,
+ip_vs_remote_request4(const struct nf_hook_ops *ops, struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return ip_vs_in(hooknum, skb, AF_INET);
+ return ip_vs_in(ops->hooknum, skb, AF_INET);
}
/*
@@ -1746,11 +1746,11 @@ ip_vs_remote_request4(unsigned int hooknum, struct sk_buff *skb,
* Schedule and forward packets from local clients
*/
static unsigned int
-ip_vs_local_request4(unsigned int hooknum, struct sk_buff *skb,
+ip_vs_local_request4(const struct nf_hook_ops *ops, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return ip_vs_in(hooknum, skb, AF_INET);
+ return ip_vs_in(ops->hooknum, skb, AF_INET);
}
#ifdef CONFIG_IP_VS_IPV6
@@ -1760,7 +1760,7 @@ ip_vs_local_request4(unsigned int hooknum, struct sk_buff *skb,
* Copy info from first fragment, to the rest of them.
*/
static unsigned int
-ip_vs_preroute_frag6(unsigned int hooknum, struct sk_buff *skb,
+ip_vs_preroute_frag6(const struct nf_hook_ops *ops, struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
@@ -1792,12 +1792,12 @@ ip_vs_preroute_frag6(unsigned int hooknum, struct sk_buff *skb,
* Schedule and forward packets from remote clients
*/
static unsigned int
-ip_vs_remote_request6(unsigned int hooknum, struct sk_buff *skb,
+ip_vs_remote_request6(const struct nf_hook_ops *ops, struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return ip_vs_in(hooknum, skb, AF_INET6);
+ return ip_vs_in(ops->hooknum, skb, AF_INET6);
}
/*
@@ -1805,11 +1805,11 @@ ip_vs_remote_request6(unsigned int hooknum, struct sk_buff *skb,
* Schedule and forward packets from local clients
*/
static unsigned int
-ip_vs_local_request6(unsigned int hooknum, struct sk_buff *skb,
+ip_vs_local_request6(const struct nf_hook_ops *ops, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return ip_vs_in(hooknum, skb, AF_INET6);
+ return ip_vs_in(ops->hooknum, skb, AF_INET6);
}
#endif
@@ -1825,7 +1825,7 @@ ip_vs_local_request6(unsigned int hooknum, struct sk_buff *skb,
* and send them to ip_vs_in_icmp.
*/
static unsigned int
-ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb,
+ip_vs_forward_icmp(const struct nf_hook_ops *ops, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
@@ -1842,12 +1842,12 @@ ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb,
if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
return NF_ACCEPT;
- return ip_vs_in_icmp(skb, &r, hooknum);
+ return ip_vs_in_icmp(skb, &r, ops->hooknum);
}
#ifdef CONFIG_IP_VS_IPV6
static unsigned int
-ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb,
+ip_vs_forward_icmp_v6(const struct nf_hook_ops *ops, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
@@ -1866,7 +1866,7 @@ ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb,
if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
return NF_ACCEPT;
- return ip_vs_in_icmp_v6(skb, &r, hooknum, &iphdr);
+ return ip_vs_in_icmp_v6(skb, &r, ops->hooknum, &iphdr);
}
#endif
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 6f0f4f7f68a5..63a815402211 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -432,6 +432,26 @@ nf_nat_setup_info(struct nf_conn *ct,
}
EXPORT_SYMBOL(nf_nat_setup_info);
+unsigned int
+nf_nat_alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
+{
+ /* Force range to this IP; let proto decide mapping for
+ * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED).
+ * Use reply in case it's already been mangled (eg local packet).
+ */
+ union nf_inet_addr ip =
+ (HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ?
+ ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3 :
+ ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3);
+ struct nf_nat_range range = {
+ .flags = NF_NAT_RANGE_MAP_IPS,
+ .min_addr = ip,
+ .max_addr = ip,
+ };
+ return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum));
+}
+EXPORT_SYMBOL_GPL(nf_nat_alloc_null_binding);
+
/* Do packet manipulations according to nf_nat_setup_info. */
unsigned int nf_nat_packet(struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
new file mode 100644
index 000000000000..dcddc49c0e08
--- /dev/null
+++ b/net/netfilter/nf_tables_api.c
@@ -0,0 +1,3275 @@
+/*
+ * Copyright (c) 2007-2009 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
+
+static LIST_HEAD(nf_tables_expressions);
+
+/**
+ * nft_register_afinfo - register nf_tables address family info
+ *
+ * @afi: address family info to register
+ *
+ * Register the address family for use with nf_tables. Returns zero on
+ * success or a negative errno code otherwise.
+ */
+int nft_register_afinfo(struct net *net, struct nft_af_info *afi)
+{
+ INIT_LIST_HEAD(&afi->tables);
+ nfnl_lock(NFNL_SUBSYS_NFTABLES);
+ list_add_tail(&afi->list, &net->nft.af_info);
+ nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nft_register_afinfo);
+
+/**
+ * nft_unregister_afinfo - unregister nf_tables address family info
+ *
+ * @afi: address family info to unregister
+ *
+ * Unregister the address family for use with nf_tables.
+ */
+void nft_unregister_afinfo(struct nft_af_info *afi)
+{
+ nfnl_lock(NFNL_SUBSYS_NFTABLES);
+ list_del(&afi->list);
+ nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+}
+EXPORT_SYMBOL_GPL(nft_unregister_afinfo);
+
+static struct nft_af_info *nft_afinfo_lookup(struct net *net, int family)
+{
+ struct nft_af_info *afi;
+
+ list_for_each_entry(afi, &net->nft.af_info, list) {
+ if (afi->family == family)
+ return afi;
+ }
+ return NULL;
+}
+
+static struct nft_af_info *
+nf_tables_afinfo_lookup(struct net *net, int family, bool autoload)
+{
+ struct nft_af_info *afi;
+
+ afi = nft_afinfo_lookup(net, family);
+ if (afi != NULL)
+ return afi;
+#ifdef CONFIG_MODULES
+ if (autoload) {
+ nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+ request_module("nft-afinfo-%u", family);
+ nfnl_lock(NFNL_SUBSYS_NFTABLES);
+ afi = nft_afinfo_lookup(net, family);
+ if (afi != NULL)
+ return ERR_PTR(-EAGAIN);
+ }
+#endif
+ return ERR_PTR(-EAFNOSUPPORT);
+}
+
+/*
+ * Tables
+ */
+
+static struct nft_table *nft_table_lookup(const struct nft_af_info *afi,
+ const struct nlattr *nla)
+{
+ struct nft_table *table;
+
+ list_for_each_entry(table, &afi->tables, list) {
+ if (!nla_strcmp(nla, table->name))
+ return table;
+ }
+ return NULL;
+}
+
+static struct nft_table *nf_tables_table_lookup(const struct nft_af_info *afi,
+ const struct nlattr *nla)
+{
+ struct nft_table *table;
+
+ if (nla == NULL)
+ return ERR_PTR(-EINVAL);
+
+ table = nft_table_lookup(afi, nla);
+ if (table != NULL)
+ return table;
+
+ return ERR_PTR(-ENOENT);
+}
+
+static inline u64 nf_tables_alloc_handle(struct nft_table *table)
+{
+ return ++table->hgenerator;
+}
+
+static struct nf_chain_type *chain_type[AF_MAX][NFT_CHAIN_T_MAX];
+
+static int __nf_tables_chain_type_lookup(int family, const struct nlattr *nla)
+{
+ int i;
+
+ for (i=0; i<NFT_CHAIN_T_MAX; i++) {
+ if (chain_type[family][i] != NULL &&
+ !nla_strcmp(nla, chain_type[family][i]->name))
+ return i;
+ }
+ return -1;
+}
+
+static int nf_tables_chain_type_lookup(const struct nft_af_info *afi,
+ const struct nlattr *nla,
+ bool autoload)
+{
+ int type;
+
+ type = __nf_tables_chain_type_lookup(afi->family, nla);
+#ifdef CONFIG_MODULES
+ if (type < 0 && autoload) {
+ nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+ request_module("nft-chain-%u-%*.s", afi->family,
+ nla_len(nla)-1, (const char *)nla_data(nla));
+ nfnl_lock(NFNL_SUBSYS_NFTABLES);
+ type = __nf_tables_chain_type_lookup(afi->family, nla);
+ }
+#endif
+ return type;
+}
+
+static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = {
+ [NFTA_TABLE_NAME] = { .type = NLA_STRING },
+ [NFTA_TABLE_FLAGS] = { .type = NLA_U32 },
+};
+
+static int nf_tables_fill_table_info(struct sk_buff *skb, u32 portid, u32 seq,
+ int event, u32 flags, int family,
+ const struct nft_table *table)
+{
+ struct nlmsghdr *nlh;
+ struct nfgenmsg *nfmsg;
+
+ event |= NFNL_SUBSYS_NFTABLES << 8;
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags);
+ if (nlh == NULL)
+ goto nla_put_failure;
+
+ nfmsg = nlmsg_data(nlh);
+ nfmsg->nfgen_family = family;
+ nfmsg->version = NFNETLINK_V0;
+ nfmsg->res_id = 0;
+
+ if (nla_put_string(skb, NFTA_TABLE_NAME, table->name) ||
+ nla_put_be32(skb, NFTA_TABLE_FLAGS, htonl(table->flags)))
+ goto nla_put_failure;
+
+ return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+ nlmsg_trim(skb, nlh);
+ return -1;
+}
+
+static int nf_tables_table_notify(const struct sk_buff *oskb,
+ const struct nlmsghdr *nlh,
+ const struct nft_table *table,
+ int event, int family)
+{
+ struct sk_buff *skb;
+ u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+ u32 seq = nlh ? nlh->nlmsg_seq : 0;
+ struct net *net = oskb ? sock_net(oskb->sk) : &init_net;
+ bool report;
+ int err;
+
+ report = nlh ? nlmsg_report(nlh) : false;
+ if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
+ return 0;
+
+ err = -ENOBUFS;
+ skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (skb == NULL)
+ goto err;
+
+ err = nf_tables_fill_table_info(skb, portid, seq, event, 0,
+ family, table);
+ if (err < 0) {
+ kfree_skb(skb);
+ goto err;
+ }
+
+ err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report,
+ GFP_KERNEL);
+err:
+ if (err < 0)
+ nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err);
+ return err;
+}
+
+static int nf_tables_dump_tables(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
+ const struct nft_af_info *afi;
+ const struct nft_table *table;
+ unsigned int idx = 0, s_idx = cb->args[0];
+ struct net *net = sock_net(skb->sk);
+ int family = nfmsg->nfgen_family;
+
+ list_for_each_entry(afi, &net->nft.af_info, list) {
+ if (family != NFPROTO_UNSPEC && family != afi->family)
+ continue;
+
+ list_for_each_entry(table, &afi->tables, list) {
+ if (idx < s_idx)
+ goto cont;
+ if (idx > s_idx)
+ memset(&cb->args[1], 0,
+ sizeof(cb->args) - sizeof(cb->args[0]));
+ if (nf_tables_fill_table_info(skb,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NFT_MSG_NEWTABLE,
+ NLM_F_MULTI,
+ afi->family, table) < 0)
+ goto done;
+cont:
+ idx++;
+ }
+ }
+done:
+ cb->args[0] = idx;
+ return skb->len;
+}
+
+static int nf_tables_gettable(struct sock *nlsk, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const nla[])
+{
+ const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ const struct nft_af_info *afi;
+ const struct nft_table *table;
+ struct sk_buff *skb2;
+ struct net *net = sock_net(skb->sk);
+ int family = nfmsg->nfgen_family;
+ int err;
+
+ if (nlh->nlmsg_flags & NLM_F_DUMP) {
+ struct netlink_dump_control c = {
+ .dump = nf_tables_dump_tables,
+ };
+ return netlink_dump_start(nlsk, skb, nlh, &c);
+ }
+
+ afi = nf_tables_afinfo_lookup(net, family, false);
+ if (IS_ERR(afi))
+ return PTR_ERR(afi);
+
+ table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME]);
+ if (IS_ERR(table))
+ return PTR_ERR(table);
+
+ skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!skb2)
+ return -ENOMEM;
+
+ err = nf_tables_fill_table_info(skb2, NETLINK_CB(skb).portid,
+ nlh->nlmsg_seq, NFT_MSG_NEWTABLE, 0,
+ family, table);
+ if (err < 0)
+ goto err;
+
+ return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid);
+
+err:
+ kfree_skb(skb2);
+ return err;
+}
+
+static int nf_tables_table_enable(struct nft_table *table)
+{
+ struct nft_chain *chain;
+ int err, i = 0;
+
+ list_for_each_entry(chain, &table->chains, list) {
+ err = nf_register_hook(&nft_base_chain(chain)->ops);
+ if (err < 0)
+ goto err;
+
+ i++;
+ }
+ return 0;
+err:
+ list_for_each_entry(chain, &table->chains, list) {
+ if (i-- <= 0)
+ break;
+
+ nf_unregister_hook(&nft_base_chain(chain)->ops);
+ }
+ return err;
+}
+
+static int nf_tables_table_disable(struct nft_table *table)
+{
+ struct nft_chain *chain;
+
+ list_for_each_entry(chain, &table->chains, list)
+ nf_unregister_hook(&nft_base_chain(chain)->ops);
+
+ return 0;
+}
+
+static int nf_tables_updtable(struct sock *nlsk, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const nla[],
+ struct nft_af_info *afi, struct nft_table *table)
+{
+ const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ int family = nfmsg->nfgen_family, ret = 0;
+
+ if (nla[NFTA_TABLE_FLAGS]) {
+ __be32 flags;
+
+ flags = ntohl(nla_get_be32(nla[NFTA_TABLE_FLAGS]));
+ if (flags & ~NFT_TABLE_F_DORMANT)
+ return -EINVAL;
+
+ if ((flags & NFT_TABLE_F_DORMANT) &&
+ !(table->flags & NFT_TABLE_F_DORMANT)) {
+ ret = nf_tables_table_disable(table);
+ if (ret >= 0)
+ table->flags |= NFT_TABLE_F_DORMANT;
+ } else if (!(flags & NFT_TABLE_F_DORMANT) &&
+ table->flags & NFT_TABLE_F_DORMANT) {
+ ret = nf_tables_table_enable(table);
+ if (ret >= 0)
+ table->flags &= ~NFT_TABLE_F_DORMANT;
+ }
+ if (ret < 0)
+ goto err;
+ }
+
+ nf_tables_table_notify(skb, nlh, table, NFT_MSG_NEWTABLE, family);
+err:
+ return ret;
+}
+
+static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const nla[])
+{
+ const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ const struct nlattr *name;
+ struct nft_af_info *afi;
+ struct nft_table *table;
+ struct net *net = sock_net(skb->sk);
+ int family = nfmsg->nfgen_family;
+
+ afi = nf_tables_afinfo_lookup(net, family, true);
+ if (IS_ERR(afi))
+ return PTR_ERR(afi);
+
+ name = nla[NFTA_TABLE_NAME];
+ table = nf_tables_table_lookup(afi, name);
+ if (IS_ERR(table)) {
+ if (PTR_ERR(table) != -ENOENT)
+ return PTR_ERR(table);
+ table = NULL;
+ }
+
+ if (table != NULL) {
+ if (nlh->nlmsg_flags & NLM_F_EXCL)
+ return -EEXIST;
+ if (nlh->nlmsg_flags & NLM_F_REPLACE)
+ return -EOPNOTSUPP;
+ return nf_tables_updtable(nlsk, skb, nlh, nla, afi, table);
+ }
+
+ table = kzalloc(sizeof(*table) + nla_len(name), GFP_KERNEL);
+ if (table == NULL)
+ return -ENOMEM;
+
+ nla_strlcpy(table->name, name, nla_len(name));
+ INIT_LIST_HEAD(&table->chains);
+ INIT_LIST_HEAD(&table->sets);
+
+ if (nla[NFTA_TABLE_FLAGS]) {
+ __be32 flags;
+
+ flags = ntohl(nla_get_be32(nla[NFTA_TABLE_FLAGS]));
+ if (flags & ~NFT_TABLE_F_DORMANT) {
+ kfree(table);
+ return -EINVAL;
+ }
+
+ table->flags |= flags;
+ }
+
+ list_add_tail(&table->list, &afi->tables);
+ nf_tables_table_notify(skb, nlh, table, NFT_MSG_NEWTABLE, family);
+ return 0;
+}
+
+static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const nla[])
+{
+ const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ struct nft_af_info *afi;
+ struct nft_table *table;
+ struct net *net = sock_net(skb->sk);
+ int family = nfmsg->nfgen_family;
+
+ afi = nf_tables_afinfo_lookup(net, family, false);
+ if (IS_ERR(afi))
+ return PTR_ERR(afi);
+
+ table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME]);
+ if (IS_ERR(table))
+ return PTR_ERR(table);
+
+ if (table->use)
+ return -EBUSY;
+
+ list_del(&table->list);
+ nf_tables_table_notify(skb, nlh, table, NFT_MSG_DELTABLE, family);
+ kfree(table);
+ return 0;
+}
+
+int nft_register_chain_type(struct nf_chain_type *ctype)
+{
+ int err = 0;
+
+ nfnl_lock(NFNL_SUBSYS_NFTABLES);
+ if (chain_type[ctype->family][ctype->type] != NULL) {
+ err = -EBUSY;
+ goto out;
+ }
+
+ if (!try_module_get(ctype->me))
+ goto out;
+
+ chain_type[ctype->family][ctype->type] = ctype;
+out:
+ nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+ return err;
+}
+EXPORT_SYMBOL_GPL(nft_register_chain_type);
+
+void nft_unregister_chain_type(struct nf_chain_type *ctype)
+{
+ nfnl_lock(NFNL_SUBSYS_NFTABLES);
+ chain_type[ctype->family][ctype->type] = NULL;
+ module_put(ctype->me);
+ nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+}
+EXPORT_SYMBOL_GPL(nft_unregister_chain_type);
+
+/*
+ * Chains
+ */
+
+static struct nft_chain *
+nf_tables_chain_lookup_byhandle(const struct nft_table *table, u64 handle)
+{
+ struct nft_chain *chain;
+
+ list_for_each_entry(chain, &table->chains, list) {
+ if (chain->handle == handle)
+ return chain;
+ }
+
+ return ERR_PTR(-ENOENT);
+}
+
+static struct nft_chain *nf_tables_chain_lookup(const struct nft_table *table,
+ const struct nlattr *nla)
+{
+ struct nft_chain *chain;
+
+ if (nla == NULL)
+ return ERR_PTR(-EINVAL);
+
+ list_for_each_entry(chain, &table->chains, list) {
+ if (!nla_strcmp(nla, chain->name))
+ return chain;
+ }
+
+ return ERR_PTR(-ENOENT);
+}
+
+static const struct nla_policy nft_chain_policy[NFTA_CHAIN_MAX + 1] = {
+ [NFTA_CHAIN_TABLE] = { .type = NLA_STRING },
+ [NFTA_CHAIN_HANDLE] = { .type = NLA_U64 },
+ [NFTA_CHAIN_NAME] = { .type = NLA_STRING,
+ .len = NFT_CHAIN_MAXNAMELEN - 1 },
+ [NFTA_CHAIN_HOOK] = { .type = NLA_NESTED },
+ [NFTA_CHAIN_POLICY] = { .type = NLA_U32 },
+ [NFTA_CHAIN_TYPE] = { .type = NLA_NUL_STRING },
+ [NFTA_CHAIN_COUNTERS] = { .type = NLA_NESTED },
+};
+
+static const struct nla_policy nft_hook_policy[NFTA_HOOK_MAX + 1] = {
+ [NFTA_HOOK_HOOKNUM] = { .type = NLA_U32 },
+ [NFTA_HOOK_PRIORITY] = { .type = NLA_U32 },
+};
+
+static int nft_dump_stats(struct sk_buff *skb, struct nft_stats __percpu *stats)
+{
+ struct nft_stats *cpu_stats, total;
+ struct nlattr *nest;
+ int cpu;
+
+ memset(&total, 0, sizeof(total));
+ for_each_possible_cpu(cpu) {
+ cpu_stats = per_cpu_ptr(stats, cpu);
+ total.pkts += cpu_stats->pkts;
+ total.bytes += cpu_stats->bytes;
+ }
+ nest = nla_nest_start(skb, NFTA_CHAIN_COUNTERS);
+ if (nest == NULL)
+ goto nla_put_failure;
+
+ if (nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(total.pkts)) ||
+ nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(total.bytes)))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, nest);
+ return 0;
+
+nla_put_failure:
+ return -ENOSPC;
+}
+
+static int nf_tables_fill_chain_info(struct sk_buff *skb, u32 portid, u32 seq,
+ int event, u32 flags, int family,
+ const struct nft_table *table,
+ const struct nft_chain *chain)
+{
+ struct nlmsghdr *nlh;
+ struct nfgenmsg *nfmsg;
+
+ event |= NFNL_SUBSYS_NFTABLES << 8;
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags);
+ if (nlh == NULL)
+ goto nla_put_failure;
+
+ nfmsg = nlmsg_data(nlh);
+ nfmsg->nfgen_family = family;
+ nfmsg->version = NFNETLINK_V0;
+ nfmsg->res_id = 0;
+
+ if (nla_put_string(skb, NFTA_CHAIN_TABLE, table->name))
+ goto nla_put_failure;
+ if (nla_put_be64(skb, NFTA_CHAIN_HANDLE, cpu_to_be64(chain->handle)))
+ goto nla_put_failure;
+ if (nla_put_string(skb, NFTA_CHAIN_NAME, chain->name))
+ goto nla_put_failure;
+
+ if (chain->flags & NFT_BASE_CHAIN) {
+ const struct nft_base_chain *basechain = nft_base_chain(chain);
+ const struct nf_hook_ops *ops = &basechain->ops;
+ struct nlattr *nest;
+
+ nest = nla_nest_start(skb, NFTA_CHAIN_HOOK);
+ if (nest == NULL)
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_HOOK_HOOKNUM, htonl(ops->hooknum)))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_HOOK_PRIORITY, htonl(ops->priority)))
+ goto nla_put_failure;
+ nla_nest_end(skb, nest);
+
+ if (nla_put_be32(skb, NFTA_CHAIN_POLICY,
+ htonl(basechain->policy)))
+ goto nla_put_failure;
+
+ if (nla_put_string(skb, NFTA_CHAIN_TYPE,
+ chain_type[ops->pf][nft_base_chain(chain)->type]->name))
+ goto nla_put_failure;
+
+ if (nft_dump_stats(skb, nft_base_chain(chain)->stats))
+ goto nla_put_failure;
+ }
+
+ if (nla_put_be32(skb, NFTA_CHAIN_USE, htonl(chain->use)))
+ goto nla_put_failure;
+
+ return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+ nlmsg_trim(skb, nlh);
+ return -1;
+}
+
+static int nf_tables_chain_notify(const struct sk_buff *oskb,
+ const struct nlmsghdr *nlh,
+ const struct nft_table *table,
+ const struct nft_chain *chain,
+ int event, int family)
+{
+ struct sk_buff *skb;
+ u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+ struct net *net = oskb ? sock_net(oskb->sk) : &init_net;
+ u32 seq = nlh ? nlh->nlmsg_seq : 0;
+ bool report;
+ int err;
+
+ report = nlh ? nlmsg_report(nlh) : false;
+ if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
+ return 0;
+
+ err = -ENOBUFS;
+ skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (skb == NULL)
+ goto err;
+
+ err = nf_tables_fill_chain_info(skb, portid, seq, event, 0, family,
+ table, chain);
+ if (err < 0) {
+ kfree_skb(skb);
+ goto err;
+ }
+
+ err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report,
+ GFP_KERNEL);
+err:
+ if (err < 0)
+ nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err);
+ return err;
+}
+
+static int nf_tables_dump_chains(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
+ const struct nft_af_info *afi;
+ const struct nft_table *table;
+ const struct nft_chain *chain;
+ unsigned int idx = 0, s_idx = cb->args[0];
+ struct net *net = sock_net(skb->sk);
+ int family = nfmsg->nfgen_family;
+
+ list_for_each_entry(afi, &net->nft.af_info, list) {
+ if (family != NFPROTO_UNSPEC && family != afi->family)
+ continue;
+
+ list_for_each_entry(table, &afi->tables, list) {
+ list_for_each_entry(chain, &table->chains, list) {
+ if (idx < s_idx)
+ goto cont;
+ if (idx > s_idx)
+ memset(&cb->args[1], 0,
+ sizeof(cb->args) - sizeof(cb->args[0]));
+ if (nf_tables_fill_chain_info(skb, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NFT_MSG_NEWCHAIN,
+ NLM_F_MULTI,
+ afi->family, table, chain) < 0)
+ goto done;
+cont:
+ idx++;
+ }
+ }
+ }
+done:
+ cb->args[0] = idx;
+ return skb->len;
+}
+
+
+static int nf_tables_getchain(struct sock *nlsk, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const nla[])
+{
+ const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ const struct nft_af_info *afi;
+ const struct nft_table *table;
+ const struct nft_chain *chain;
+ struct sk_buff *skb2;
+ struct net *net = sock_net(skb->sk);
+ int family = nfmsg->nfgen_family;
+ int err;
+
+ if (nlh->nlmsg_flags & NLM_F_DUMP) {
+ struct netlink_dump_control c = {
+ .dump = nf_tables_dump_chains,
+ };
+ return netlink_dump_start(nlsk, skb, nlh, &c);
+ }
+
+ afi = nf_tables_afinfo_lookup(net, family, false);
+ if (IS_ERR(afi))
+ return PTR_ERR(afi);
+
+ table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]);
+ if (IS_ERR(table))
+ return PTR_ERR(table);
+
+ chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]);
+ if (IS_ERR(chain))
+ return PTR_ERR(chain);
+
+ skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!skb2)
+ return -ENOMEM;
+
+ err = nf_tables_fill_chain_info(skb2, NETLINK_CB(skb).portid,
+ nlh->nlmsg_seq, NFT_MSG_NEWCHAIN, 0,
+ family, table, chain);
+ if (err < 0)
+ goto err;
+
+ return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid);
+
+err:
+ kfree_skb(skb2);
+ return err;
+}
+
+static int
+nf_tables_chain_policy(struct nft_base_chain *chain, const struct nlattr *attr)
+{
+ switch (ntohl(nla_get_be32(attr))) {
+ case NF_DROP:
+ chain->policy = NF_DROP;
+ break;
+ case NF_ACCEPT:
+ chain->policy = NF_ACCEPT;
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static const struct nla_policy nft_counter_policy[NFTA_COUNTER_MAX + 1] = {
+ [NFTA_COUNTER_PACKETS] = { .type = NLA_U64 },
+ [NFTA_COUNTER_BYTES] = { .type = NLA_U64 },
+};
+
+static int
+nf_tables_counters(struct nft_base_chain *chain, const struct nlattr *attr)
+{
+ struct nlattr *tb[NFTA_COUNTER_MAX+1];
+ struct nft_stats __percpu *newstats;
+ struct nft_stats *stats;
+ int err;
+
+ err = nla_parse_nested(tb, NFTA_COUNTER_MAX, attr, nft_counter_policy);
+ if (err < 0)
+ return err;
+
+ if (!tb[NFTA_COUNTER_BYTES] || !tb[NFTA_COUNTER_PACKETS])
+ return -EINVAL;
+
+ newstats = alloc_percpu(struct nft_stats);
+ if (newstats == NULL)
+ return -ENOMEM;
+
+ /* Restore old counters on this cpu, no problem. Per-cpu statistics
+ * are not exposed to userspace.
+ */
+ stats = this_cpu_ptr(newstats);
+ stats->bytes = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES]));
+ stats->pkts = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS]));
+
+ if (chain->stats) {
+ /* nfnl_lock is held, add some nfnl function for this, later */
+ struct nft_stats __percpu *oldstats =
+ rcu_dereference_protected(chain->stats, 1);
+
+ rcu_assign_pointer(chain->stats, newstats);
+ synchronize_rcu();
+ free_percpu(oldstats);
+ } else
+ rcu_assign_pointer(chain->stats, newstats);
+
+ return 0;
+}
+
+static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const nla[])
+{
+ const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ const struct nlattr * uninitialized_var(name);
+ const struct nft_af_info *afi;
+ struct nft_table *table;
+ struct nft_chain *chain;
+ struct nft_base_chain *basechain = NULL;
+ struct nlattr *ha[NFTA_HOOK_MAX + 1];
+ struct net *net = sock_net(skb->sk);
+ int family = nfmsg->nfgen_family;
+ u64 handle = 0;
+ int err;
+ bool create;
+
+ create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
+
+ afi = nf_tables_afinfo_lookup(net, family, true);
+ if (IS_ERR(afi))
+ return PTR_ERR(afi);
+
+ table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]);
+ if (IS_ERR(table))
+ return PTR_ERR(table);
+
+ if (table->use == UINT_MAX)
+ return -EOVERFLOW;
+
+ chain = NULL;
+ name = nla[NFTA_CHAIN_NAME];
+
+ if (nla[NFTA_CHAIN_HANDLE]) {
+ handle = be64_to_cpu(nla_get_be64(nla[NFTA_CHAIN_HANDLE]));
+ chain = nf_tables_chain_lookup_byhandle(table, handle);
+ if (IS_ERR(chain))
+ return PTR_ERR(chain);
+ } else {
+ chain = nf_tables_chain_lookup(table, name);
+ if (IS_ERR(chain)) {
+ if (PTR_ERR(chain) != -ENOENT)
+ return PTR_ERR(chain);
+ chain = NULL;
+ }
+ }
+
+ if (chain != NULL) {
+ if (nlh->nlmsg_flags & NLM_F_EXCL)
+ return -EEXIST;
+ if (nlh->nlmsg_flags & NLM_F_REPLACE)
+ return -EOPNOTSUPP;
+
+ if (nla[NFTA_CHAIN_HANDLE] && name &&
+ !IS_ERR(nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME])))
+ return -EEXIST;
+
+ if (nla[NFTA_CHAIN_POLICY]) {
+ if (!(chain->flags & NFT_BASE_CHAIN))
+ return -EOPNOTSUPP;
+
+ err = nf_tables_chain_policy(nft_base_chain(chain),
+ nla[NFTA_CHAIN_POLICY]);
+ if (err < 0)
+ return err;
+ }
+
+ if (nla[NFTA_CHAIN_COUNTERS]) {
+ if (!(chain->flags & NFT_BASE_CHAIN))
+ return -EOPNOTSUPP;
+
+ err = nf_tables_counters(nft_base_chain(chain),
+ nla[NFTA_CHAIN_COUNTERS]);
+ if (err < 0)
+ return err;
+ }
+
+ if (nla[NFTA_CHAIN_HANDLE] && name)
+ nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN);
+
+ goto notify;
+ }
+
+ if (nla[NFTA_CHAIN_HOOK]) {
+ struct nf_hook_ops *ops;
+ nf_hookfn *hookfn;
+ u32 hooknum;
+ int type = NFT_CHAIN_T_DEFAULT;
+
+ if (nla[NFTA_CHAIN_TYPE]) {
+ type = nf_tables_chain_type_lookup(afi,
+ nla[NFTA_CHAIN_TYPE],
+ create);
+ if (type < 0)
+ return -ENOENT;
+ }
+
+ err = nla_parse_nested(ha, NFTA_HOOK_MAX, nla[NFTA_CHAIN_HOOK],
+ nft_hook_policy);
+ if (err < 0)
+ return err;
+ if (ha[NFTA_HOOK_HOOKNUM] == NULL ||
+ ha[NFTA_HOOK_PRIORITY] == NULL)
+ return -EINVAL;
+
+ hooknum = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM]));
+ if (hooknum >= afi->nhooks)
+ return -EINVAL;
+
+ hookfn = chain_type[family][type]->fn[hooknum];
+ if (hookfn == NULL)
+ return -EOPNOTSUPP;
+
+ basechain = kzalloc(sizeof(*basechain), GFP_KERNEL);
+ if (basechain == NULL)
+ return -ENOMEM;
+
+ basechain->type = type;
+ chain = &basechain->chain;
+
+ ops = &basechain->ops;
+ ops->pf = family;
+ ops->owner = afi->owner;
+ ops->hooknum = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM]));
+ ops->priority = ntohl(nla_get_be32(ha[NFTA_HOOK_PRIORITY]));
+ ops->priv = chain;
+ ops->hook = hookfn;
+ if (afi->hooks[ops->hooknum])
+ ops->hook = afi->hooks[ops->hooknum];
+
+ chain->flags |= NFT_BASE_CHAIN;
+
+ if (nla[NFTA_CHAIN_POLICY]) {
+ err = nf_tables_chain_policy(basechain,
+ nla[NFTA_CHAIN_POLICY]);
+ if (err < 0) {
+ free_percpu(basechain->stats);
+ kfree(basechain);
+ return err;
+ }
+ } else
+ basechain->policy = NF_ACCEPT;
+
+ if (nla[NFTA_CHAIN_COUNTERS]) {
+ err = nf_tables_counters(basechain,
+ nla[NFTA_CHAIN_COUNTERS]);
+ if (err < 0) {
+ free_percpu(basechain->stats);
+ kfree(basechain);
+ return err;
+ }
+ } else {
+ struct nft_stats __percpu *newstats;
+
+ newstats = alloc_percpu(struct nft_stats);
+ if (newstats == NULL)
+ return -ENOMEM;
+
+ rcu_assign_pointer(nft_base_chain(chain)->stats,
+ newstats);
+ }
+ } else {
+ chain = kzalloc(sizeof(*chain), GFP_KERNEL);
+ if (chain == NULL)
+ return -ENOMEM;
+ }
+
+ INIT_LIST_HEAD(&chain->rules);
+ chain->handle = nf_tables_alloc_handle(table);
+ chain->net = net;
+ chain->table = table;
+ nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN);
+
+ if (!(table->flags & NFT_TABLE_F_DORMANT) &&
+ chain->flags & NFT_BASE_CHAIN) {
+ err = nf_register_hook(&nft_base_chain(chain)->ops);
+ if (err < 0) {
+ free_percpu(basechain->stats);
+ kfree(basechain);
+ return err;
+ }
+ }
+ list_add_tail(&chain->list, &table->chains);
+ table->use++;
+notify:
+ nf_tables_chain_notify(skb, nlh, table, chain, NFT_MSG_NEWCHAIN,
+ family);
+ return 0;
+}
+
+static void nf_tables_rcu_chain_destroy(struct rcu_head *head)
+{
+ struct nft_chain *chain = container_of(head, struct nft_chain, rcu_head);
+
+ BUG_ON(chain->use > 0);
+
+ if (chain->flags & NFT_BASE_CHAIN) {
+ free_percpu(nft_base_chain(chain)->stats);
+ kfree(nft_base_chain(chain));
+ } else
+ kfree(chain);
+}
+
+static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const nla[])
+{
+ const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ const struct nft_af_info *afi;
+ struct nft_table *table;
+ struct nft_chain *chain;
+ struct net *net = sock_net(skb->sk);
+ int family = nfmsg->nfgen_family;
+
+ afi = nf_tables_afinfo_lookup(net, family, false);
+ if (IS_ERR(afi))
+ return PTR_ERR(afi);
+
+ table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]);
+ if (IS_ERR(table))
+ return PTR_ERR(table);
+
+ chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]);
+ if (IS_ERR(chain))
+ return PTR_ERR(chain);
+
+ if (!list_empty(&chain->rules))
+ return -EBUSY;
+
+ list_del(&chain->list);
+ table->use--;
+
+ if (!(table->flags & NFT_TABLE_F_DORMANT) &&
+ chain->flags & NFT_BASE_CHAIN)
+ nf_unregister_hook(&nft_base_chain(chain)->ops);
+
+ nf_tables_chain_notify(skb, nlh, table, chain, NFT_MSG_DELCHAIN,
+ family);
+
+ /* Make sure all rule references are gone before this is released */
+ call_rcu(&chain->rcu_head, nf_tables_rcu_chain_destroy);
+ return 0;
+}
+
+static void nft_ctx_init(struct nft_ctx *ctx,
+ const struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nft_af_info *afi,
+ const struct nft_table *table,
+ const struct nft_chain *chain,
+ const struct nlattr * const *nla)
+{
+ ctx->net = sock_net(skb->sk);
+ ctx->skb = skb;
+ ctx->nlh = nlh;
+ ctx->afi = afi;
+ ctx->table = table;
+ ctx->chain = chain;
+ ctx->nla = nla;
+}
+
+/*
+ * Expressions
+ */
+
+/**
+ * nft_register_expr - register nf_tables expr type
+ * @ops: expr type
+ *
+ * Registers the expr type for use with nf_tables. Returns zero on
+ * success or a negative errno code otherwise.
+ */
+int nft_register_expr(struct nft_expr_type *type)
+{
+ nfnl_lock(NFNL_SUBSYS_NFTABLES);
+ list_add_tail(&type->list, &nf_tables_expressions);
+ nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nft_register_expr);
+
+/**
+ * nft_unregister_expr - unregister nf_tables expr type
+ * @ops: expr type
+ *
+ * Unregisters the expr typefor use with nf_tables.
+ */
+void nft_unregister_expr(struct nft_expr_type *type)
+{
+ nfnl_lock(NFNL_SUBSYS_NFTABLES);
+ list_del(&type->list);
+ nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+}
+EXPORT_SYMBOL_GPL(nft_unregister_expr);
+
+static const struct nft_expr_type *__nft_expr_type_get(struct nlattr *nla)
+{
+ const struct nft_expr_type *type;
+
+ list_for_each_entry(type, &nf_tables_expressions, list) {
+ if (!nla_strcmp(nla, type->name))
+ return type;
+ }
+ return NULL;
+}
+
+static const struct nft_expr_type *nft_expr_type_get(struct nlattr *nla)
+{
+ const struct nft_expr_type *type;
+
+ if (nla == NULL)
+ return ERR_PTR(-EINVAL);
+
+ type = __nft_expr_type_get(nla);
+ if (type != NULL && try_module_get(type->owner))
+ return type;
+
+#ifdef CONFIG_MODULES
+ if (type == NULL) {
+ nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+ request_module("nft-expr-%.*s",
+ nla_len(nla), (char *)nla_data(nla));
+ nfnl_lock(NFNL_SUBSYS_NFTABLES);
+ if (__nft_expr_type_get(nla))
+ return ERR_PTR(-EAGAIN);
+ }
+#endif
+ return ERR_PTR(-ENOENT);
+}
+
+static const struct nla_policy nft_expr_policy[NFTA_EXPR_MAX + 1] = {
+ [NFTA_EXPR_NAME] = { .type = NLA_STRING },
+ [NFTA_EXPR_DATA] = { .type = NLA_NESTED },
+};
+
+static int nf_tables_fill_expr_info(struct sk_buff *skb,
+ const struct nft_expr *expr)
+{
+ if (nla_put_string(skb, NFTA_EXPR_NAME, expr->ops->type->name))
+ goto nla_put_failure;
+
+ if (expr->ops->dump) {
+ struct nlattr *data = nla_nest_start(skb, NFTA_EXPR_DATA);
+ if (data == NULL)
+ goto nla_put_failure;
+ if (expr->ops->dump(skb, expr) < 0)
+ goto nla_put_failure;
+ nla_nest_end(skb, data);
+ }
+
+ return skb->len;
+
+nla_put_failure:
+ return -1;
+};
+
+struct nft_expr_info {
+ const struct nft_expr_ops *ops;
+ struct nlattr *tb[NFT_EXPR_MAXATTR + 1];
+};
+
+static int nf_tables_expr_parse(const struct nft_ctx *ctx,
+ const struct nlattr *nla,
+ struct nft_expr_info *info)
+{
+ const struct nft_expr_type *type;
+ const struct nft_expr_ops *ops;
+ struct nlattr *tb[NFTA_EXPR_MAX + 1];
+ int err;
+
+ err = nla_parse_nested(tb, NFTA_EXPR_MAX, nla, nft_expr_policy);
+ if (err < 0)
+ return err;
+
+ type = nft_expr_type_get(tb[NFTA_EXPR_NAME]);
+ if (IS_ERR(type))
+ return PTR_ERR(type);
+
+ if (tb[NFTA_EXPR_DATA]) {
+ err = nla_parse_nested(info->tb, type->maxattr,
+ tb[NFTA_EXPR_DATA], type->policy);
+ if (err < 0)
+ goto err1;
+ } else
+ memset(info->tb, 0, sizeof(info->tb[0]) * (type->maxattr + 1));
+
+ if (type->select_ops != NULL) {
+ ops = type->select_ops(ctx,
+ (const struct nlattr * const *)info->tb);
+ if (IS_ERR(ops)) {
+ err = PTR_ERR(ops);
+ goto err1;
+ }
+ } else
+ ops = type->ops;
+
+ info->ops = ops;
+ return 0;
+
+err1:
+ module_put(type->owner);
+ return err;
+}
+
+static int nf_tables_newexpr(const struct nft_ctx *ctx,
+ const struct nft_expr_info *info,
+ struct nft_expr *expr)
+{
+ const struct nft_expr_ops *ops = info->ops;
+ int err;
+
+ expr->ops = ops;
+ if (ops->init) {
+ err = ops->init(ctx, expr, (const struct nlattr **)info->tb);
+ if (err < 0)
+ goto err1;
+ }
+
+ return 0;
+
+err1:
+ expr->ops = NULL;
+ return err;
+}
+
+static void nf_tables_expr_destroy(struct nft_expr *expr)
+{
+ if (expr->ops->destroy)
+ expr->ops->destroy(expr);
+ module_put(expr->ops->type->owner);
+}
+
+/*
+ * Rules
+ */
+
+static struct nft_rule *__nf_tables_rule_lookup(const struct nft_chain *chain,
+ u64 handle)
+{
+ struct nft_rule *rule;
+
+ // FIXME: this sucks
+ list_for_each_entry(rule, &chain->rules, list) {
+ if (handle == rule->handle)
+ return rule;
+ }
+
+ return ERR_PTR(-ENOENT);
+}
+
+static struct nft_rule *nf_tables_rule_lookup(const struct nft_chain *chain,
+ const struct nlattr *nla)
+{
+ if (nla == NULL)
+ return ERR_PTR(-EINVAL);
+
+ return __nf_tables_rule_lookup(chain, be64_to_cpu(nla_get_be64(nla)));
+}
+
+static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = {
+ [NFTA_RULE_TABLE] = { .type = NLA_STRING },
+ [NFTA_RULE_CHAIN] = { .type = NLA_STRING,
+ .len = NFT_CHAIN_MAXNAMELEN - 1 },
+ [NFTA_RULE_HANDLE] = { .type = NLA_U64 },
+ [NFTA_RULE_EXPRESSIONS] = { .type = NLA_NESTED },
+ [NFTA_RULE_COMPAT] = { .type = NLA_NESTED },
+ [NFTA_RULE_POSITION] = { .type = NLA_U64 },
+};
+
+static int nf_tables_fill_rule_info(struct sk_buff *skb, u32 portid, u32 seq,
+ int event, u32 flags, int family,
+ const struct nft_table *table,
+ const struct nft_chain *chain,
+ const struct nft_rule *rule)
+{
+ struct nlmsghdr *nlh;
+ struct nfgenmsg *nfmsg;
+ const struct nft_expr *expr, *next;
+ struct nlattr *list;
+ const struct nft_rule *prule;
+ int type = event | NFNL_SUBSYS_NFTABLES << 8;
+
+ nlh = nlmsg_put(skb, portid, seq, type, sizeof(struct nfgenmsg),
+ flags);
+ if (nlh == NULL)
+ goto nla_put_failure;
+
+ nfmsg = nlmsg_data(nlh);
+ nfmsg->nfgen_family = family;
+ nfmsg->version = NFNETLINK_V0;
+ nfmsg->res_id = 0;
+
+ if (nla_put_string(skb, NFTA_RULE_TABLE, table->name))
+ goto nla_put_failure;
+ if (nla_put_string(skb, NFTA_RULE_CHAIN, chain->name))
+ goto nla_put_failure;
+ if (nla_put_be64(skb, NFTA_RULE_HANDLE, cpu_to_be64(rule->handle)))
+ goto nla_put_failure;
+
+ if ((event != NFT_MSG_DELRULE) && (rule->list.prev != &chain->rules)) {
+ prule = list_entry(rule->list.prev, struct nft_rule, list);
+ if (nla_put_be64(skb, NFTA_RULE_POSITION,
+ cpu_to_be64(prule->handle)))
+ goto nla_put_failure;
+ }
+
+ list = nla_nest_start(skb, NFTA_RULE_EXPRESSIONS);
+ if (list == NULL)
+ goto nla_put_failure;
+ nft_rule_for_each_expr(expr, next, rule) {
+ struct nlattr *elem = nla_nest_start(skb, NFTA_LIST_ELEM);
+ if (elem == NULL)
+ goto nla_put_failure;
+ if (nf_tables_fill_expr_info(skb, expr) < 0)
+ goto nla_put_failure;
+ nla_nest_end(skb, elem);
+ }
+ nla_nest_end(skb, list);
+
+ return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+ nlmsg_trim(skb, nlh);
+ return -1;
+}
+
+static int nf_tables_rule_notify(const struct sk_buff *oskb,
+ const struct nlmsghdr *nlh,
+ const struct nft_table *table,
+ const struct nft_chain *chain,
+ const struct nft_rule *rule,
+ int event, u32 flags, int family)
+{
+ struct sk_buff *skb;
+ u32 portid = NETLINK_CB(oskb).portid;
+ struct net *net = oskb ? sock_net(oskb->sk) : &init_net;
+ u32 seq = nlh->nlmsg_seq;
+ bool report;
+ int err;
+
+ report = nlmsg_report(nlh);
+ if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
+ return 0;
+
+ err = -ENOBUFS;
+ skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (skb == NULL)
+ goto err;
+
+ err = nf_tables_fill_rule_info(skb, portid, seq, event, flags,
+ family, table, chain, rule);
+ if (err < 0) {
+ kfree_skb(skb);
+ goto err;
+ }
+
+ err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report,
+ GFP_KERNEL);
+err:
+ if (err < 0)
+ nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err);
+ return err;
+}
+
+static inline bool
+nft_rule_is_active(struct net *net, const struct nft_rule *rule)
+{
+ return (rule->genmask & (1 << net->nft.gencursor)) == 0;
+}
+
+static inline int gencursor_next(struct net *net)
+{
+ return net->nft.gencursor+1 == 1 ? 1 : 0;
+}
+
+static inline int
+nft_rule_is_active_next(struct net *net, const struct nft_rule *rule)
+{
+ return (rule->genmask & (1 << gencursor_next(net))) == 0;
+}
+
+static inline void
+nft_rule_activate_next(struct net *net, struct nft_rule *rule)
+{
+ /* Now inactive, will be active in the future */
+ rule->genmask = (1 << net->nft.gencursor);
+}
+
+static inline void
+nft_rule_disactivate_next(struct net *net, struct nft_rule *rule)
+{
+ rule->genmask = (1 << gencursor_next(net));
+}
+
+static inline void nft_rule_clear(struct net *net, struct nft_rule *rule)
+{
+ rule->genmask = 0;
+}
+
+static int nf_tables_dump_rules(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
+ const struct nft_af_info *afi;
+ const struct nft_table *table;
+ const struct nft_chain *chain;
+ const struct nft_rule *rule;
+ unsigned int idx = 0, s_idx = cb->args[0];
+ struct net *net = sock_net(skb->sk);
+ int family = nfmsg->nfgen_family;
+ u8 genctr = ACCESS_ONCE(net->nft.genctr);
+ u8 gencursor = ACCESS_ONCE(net->nft.gencursor);
+
+ list_for_each_entry(afi, &net->nft.af_info, list) {
+ if (family != NFPROTO_UNSPEC && family != afi->family)
+ continue;
+
+ list_for_each_entry(table, &afi->tables, list) {
+ list_for_each_entry(chain, &table->chains, list) {
+ list_for_each_entry(rule, &chain->rules, list) {
+ if (!nft_rule_is_active(net, rule))
+ goto cont;
+ if (idx < s_idx)
+ goto cont;
+ if (idx > s_idx)
+ memset(&cb->args[1], 0,
+ sizeof(cb->args) - sizeof(cb->args[0]));
+ if (nf_tables_fill_rule_info(skb, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NFT_MSG_NEWRULE,
+ NLM_F_MULTI | NLM_F_APPEND,
+ afi->family, table, chain, rule) < 0)
+ goto done;
+cont:
+ idx++;
+ }
+ }
+ }
+ }
+done:
+ /* Invalidate this dump, a transition to the new generation happened */
+ if (gencursor != net->nft.gencursor || genctr != net->nft.genctr)
+ return -EBUSY;
+
+ cb->args[0] = idx;
+ return skb->len;
+}
+
+static int nf_tables_getrule(struct sock *nlsk, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const nla[])
+{
+ const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ const struct nft_af_info *afi;
+ const struct nft_table *table;
+ const struct nft_chain *chain;
+ const struct nft_rule *rule;
+ struct sk_buff *skb2;
+ struct net *net = sock_net(skb->sk);
+ int family = nfmsg->nfgen_family;
+ int err;
+
+ if (nlh->nlmsg_flags & NLM_F_DUMP) {
+ struct netlink_dump_control c = {
+ .dump = nf_tables_dump_rules,
+ };
+ return netlink_dump_start(nlsk, skb, nlh, &c);
+ }
+
+ afi = nf_tables_afinfo_lookup(net, family, false);
+ if (IS_ERR(afi))
+ return PTR_ERR(afi);
+
+ table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]);
+ if (IS_ERR(table))
+ return PTR_ERR(table);
+
+ chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]);
+ if (IS_ERR(chain))
+ return PTR_ERR(chain);
+
+ rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]);
+ if (IS_ERR(rule))
+ return PTR_ERR(rule);
+
+ skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!skb2)
+ return -ENOMEM;
+
+ err = nf_tables_fill_rule_info(skb2, NETLINK_CB(skb).portid,
+ nlh->nlmsg_seq, NFT_MSG_NEWRULE, 0,
+ family, table, chain, rule);
+ if (err < 0)
+ goto err;
+
+ return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid);
+
+err:
+ kfree_skb(skb2);
+ return err;
+}
+
+static void nf_tables_rcu_rule_destroy(struct rcu_head *head)
+{
+ struct nft_rule *rule = container_of(head, struct nft_rule, rcu_head);
+ struct nft_expr *expr;
+
+ /*
+ * Careful: some expressions might not be initialized in case this
+ * is called on error from nf_tables_newrule().
+ */
+ expr = nft_expr_first(rule);
+ while (expr->ops && expr != nft_expr_last(rule)) {
+ nf_tables_expr_destroy(expr);
+ expr = nft_expr_next(expr);
+ }
+ kfree(rule);
+}
+
+static void nf_tables_rule_destroy(struct nft_rule *rule)
+{
+ call_rcu(&rule->rcu_head, nf_tables_rcu_rule_destroy);
+}
+
+#define NFT_RULE_MAXEXPRS 128
+
+static struct nft_expr_info *info;
+
+static struct nft_rule_trans *
+nf_tables_trans_add(struct nft_rule *rule, const struct nft_ctx *ctx)
+{
+ struct nft_rule_trans *rupd;
+
+ rupd = kmalloc(sizeof(struct nft_rule_trans), GFP_KERNEL);
+ if (rupd == NULL)
+ return NULL;
+
+ rupd->chain = ctx->chain;
+ rupd->table = ctx->table;
+ rupd->rule = rule;
+ rupd->family = ctx->afi->family;
+ rupd->nlh = ctx->nlh;
+ list_add_tail(&rupd->list, &ctx->net->nft.commit_list);
+
+ return rupd;
+}
+
+static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const nla[])
+{
+ const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ const struct nft_af_info *afi;
+ struct net *net = sock_net(skb->sk);
+ struct nft_table *table;
+ struct nft_chain *chain;
+ struct nft_rule *rule, *old_rule = NULL;
+ struct nft_rule_trans *repl = NULL;
+ struct nft_expr *expr;
+ struct nft_ctx ctx;
+ struct nlattr *tmp;
+ unsigned int size, i, n;
+ int err, rem;
+ bool create;
+ u64 handle, pos_handle;
+
+ create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
+
+ afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, create);
+ if (IS_ERR(afi))
+ return PTR_ERR(afi);
+
+ table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]);
+ if (IS_ERR(table))
+ return PTR_ERR(table);
+
+ chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]);
+ if (IS_ERR(chain))
+ return PTR_ERR(chain);
+
+ if (nla[NFTA_RULE_HANDLE]) {
+ handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_HANDLE]));
+ rule = __nf_tables_rule_lookup(chain, handle);
+ if (IS_ERR(rule))
+ return PTR_ERR(rule);
+
+ if (nlh->nlmsg_flags & NLM_F_EXCL)
+ return -EEXIST;
+ if (nlh->nlmsg_flags & NLM_F_REPLACE)
+ old_rule = rule;
+ else
+ return -EOPNOTSUPP;
+ } else {
+ if (!create || nlh->nlmsg_flags & NLM_F_REPLACE)
+ return -EINVAL;
+ handle = nf_tables_alloc_handle(table);
+ }
+
+ if (nla[NFTA_RULE_POSITION]) {
+ if (!(nlh->nlmsg_flags & NLM_F_CREATE))
+ return -EOPNOTSUPP;
+
+ pos_handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_POSITION]));
+ old_rule = __nf_tables_rule_lookup(chain, pos_handle);
+ if (IS_ERR(old_rule))
+ return PTR_ERR(old_rule);
+ }
+
+ nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
+
+ n = 0;
+ size = 0;
+ if (nla[NFTA_RULE_EXPRESSIONS]) {
+ nla_for_each_nested(tmp, nla[NFTA_RULE_EXPRESSIONS], rem) {
+ err = -EINVAL;
+ if (nla_type(tmp) != NFTA_LIST_ELEM)
+ goto err1;
+ if (n == NFT_RULE_MAXEXPRS)
+ goto err1;
+ err = nf_tables_expr_parse(&ctx, tmp, &info[n]);
+ if (err < 0)
+ goto err1;
+ size += info[n].ops->size;
+ n++;
+ }
+ }
+
+ err = -ENOMEM;
+ rule = kzalloc(sizeof(*rule) + size, GFP_KERNEL);
+ if (rule == NULL)
+ goto err1;
+
+ nft_rule_activate_next(net, rule);
+
+ rule->handle = handle;
+ rule->dlen = size;
+
+ expr = nft_expr_first(rule);
+ for (i = 0; i < n; i++) {
+ err = nf_tables_newexpr(&ctx, &info[i], expr);
+ if (err < 0)
+ goto err2;
+ info[i].ops = NULL;
+ expr = nft_expr_next(expr);
+ }
+
+ if (nlh->nlmsg_flags & NLM_F_REPLACE) {
+ if (nft_rule_is_active_next(net, old_rule)) {
+ repl = nf_tables_trans_add(old_rule, &ctx);
+ if (repl == NULL) {
+ err = -ENOMEM;
+ goto err2;
+ }
+ nft_rule_disactivate_next(net, old_rule);
+ list_add_tail(&rule->list, &old_rule->list);
+ } else {
+ err = -ENOENT;
+ goto err2;
+ }
+ } else if (nlh->nlmsg_flags & NLM_F_APPEND)
+ if (old_rule)
+ list_add_rcu(&rule->list, &old_rule->list);
+ else
+ list_add_tail_rcu(&rule->list, &chain->rules);
+ else {
+ if (old_rule)
+ list_add_tail_rcu(&rule->list, &old_rule->list);
+ else
+ list_add_rcu(&rule->list, &chain->rules);
+ }
+
+ if (nf_tables_trans_add(rule, &ctx) == NULL) {
+ err = -ENOMEM;
+ goto err3;
+ }
+ return 0;
+
+err3:
+ list_del_rcu(&rule->list);
+ if (repl) {
+ list_del_rcu(&repl->rule->list);
+ list_del(&repl->list);
+ nft_rule_clear(net, repl->rule);
+ kfree(repl);
+ }
+err2:
+ nf_tables_rule_destroy(rule);
+err1:
+ for (i = 0; i < n; i++) {
+ if (info[i].ops != NULL)
+ module_put(info[i].ops->type->owner);
+ }
+ return err;
+}
+
+static int
+nf_tables_delrule_one(struct nft_ctx *ctx, struct nft_rule *rule)
+{
+ /* You cannot delete the same rule twice */
+ if (nft_rule_is_active_next(ctx->net, rule)) {
+ if (nf_tables_trans_add(rule, ctx) == NULL)
+ return -ENOMEM;
+ nft_rule_disactivate_next(ctx->net, rule);
+ return 0;
+ }
+ return -ENOENT;
+}
+
+static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const nla[])
+{
+ const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ const struct nft_af_info *afi;
+ struct net *net = sock_net(skb->sk);
+ const struct nft_table *table;
+ struct nft_chain *chain;
+ struct nft_rule *rule, *tmp;
+ int family = nfmsg->nfgen_family, err = 0;
+ struct nft_ctx ctx;
+
+ afi = nf_tables_afinfo_lookup(net, family, false);
+ if (IS_ERR(afi))
+ return PTR_ERR(afi);
+
+ table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]);
+ if (IS_ERR(table))
+ return PTR_ERR(table);
+
+ chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]);
+ if (IS_ERR(chain))
+ return PTR_ERR(chain);
+
+ nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
+
+ if (nla[NFTA_RULE_HANDLE]) {
+ rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]);
+ if (IS_ERR(rule))
+ return PTR_ERR(rule);
+
+ err = nf_tables_delrule_one(&ctx, rule);
+ } else {
+ /* Remove all rules in this chain */
+ list_for_each_entry_safe(rule, tmp, &chain->rules, list) {
+ err = nf_tables_delrule_one(&ctx, rule);
+ if (err < 0)
+ break;
+ }
+ }
+
+ return err;
+}
+
+static int nf_tables_commit(struct sk_buff *skb)
+{
+ struct net *net = sock_net(skb->sk);
+ struct nft_rule_trans *rupd, *tmp;
+
+ /* Bump generation counter, invalidate any dump in progress */
+ net->nft.genctr++;
+
+ /* A new generation has just started */
+ net->nft.gencursor = gencursor_next(net);
+
+ /* Make sure all packets have left the previous generation before
+ * purging old rules.
+ */
+ synchronize_rcu();
+
+ list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) {
+ /* Delete this rule from the dirty list */
+ list_del(&rupd->list);
+
+ /* This rule was inactive in the past and just became active.
+ * Clear the next bit of the genmask since its meaning has
+ * changed, now it is the future.
+ */
+ if (nft_rule_is_active(net, rupd->rule)) {
+ nft_rule_clear(net, rupd->rule);
+ nf_tables_rule_notify(skb, rupd->nlh, rupd->table,
+ rupd->chain, rupd->rule,
+ NFT_MSG_NEWRULE, 0,
+ rupd->family);
+ kfree(rupd);
+ continue;
+ }
+
+ /* This rule is in the past, get rid of it */
+ list_del_rcu(&rupd->rule->list);
+ nf_tables_rule_notify(skb, rupd->nlh, rupd->table, rupd->chain,
+ rupd->rule, NFT_MSG_DELRULE, 0,
+ rupd->family);
+ nf_tables_rule_destroy(rupd->rule);
+ kfree(rupd);
+ }
+
+ return 0;
+}
+
+static int nf_tables_abort(struct sk_buff *skb)
+{
+ struct net *net = sock_net(skb->sk);
+ struct nft_rule_trans *rupd, *tmp;
+
+ list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) {
+ /* Delete all rules from the dirty list */
+ list_del(&rupd->list);
+
+ if (!nft_rule_is_active_next(net, rupd->rule)) {
+ nft_rule_clear(net, rupd->rule);
+ kfree(rupd);
+ continue;
+ }
+
+ /* This rule is inactive, get rid of it */
+ list_del_rcu(&rupd->rule->list);
+ nf_tables_rule_destroy(rupd->rule);
+ kfree(rupd);
+ }
+ return 0;
+}
+
+/*
+ * Sets
+ */
+
+static LIST_HEAD(nf_tables_set_ops);
+
+int nft_register_set(struct nft_set_ops *ops)
+{
+ nfnl_lock(NFNL_SUBSYS_NFTABLES);
+ list_add_tail(&ops->list, &nf_tables_set_ops);
+ nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nft_register_set);
+
+void nft_unregister_set(struct nft_set_ops *ops)
+{
+ nfnl_lock(NFNL_SUBSYS_NFTABLES);
+ list_del(&ops->list);
+ nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+}
+EXPORT_SYMBOL_GPL(nft_unregister_set);
+
+static const struct nft_set_ops *nft_select_set_ops(const struct nlattr * const nla[])
+{
+ const struct nft_set_ops *ops;
+ u32 features;
+
+#ifdef CONFIG_MODULES
+ if (list_empty(&nf_tables_set_ops)) {
+ nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+ request_module("nft-set");
+ nfnl_lock(NFNL_SUBSYS_NFTABLES);
+ if (!list_empty(&nf_tables_set_ops))
+ return ERR_PTR(-EAGAIN);
+ }
+#endif
+ features = 0;
+ if (nla[NFTA_SET_FLAGS] != NULL) {
+ features = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS]));
+ features &= NFT_SET_INTERVAL | NFT_SET_MAP;
+ }
+
+ // FIXME: implement selection properly
+ list_for_each_entry(ops, &nf_tables_set_ops, list) {
+ if ((ops->features & features) != features)
+ continue;
+ if (!try_module_get(ops->owner))
+ continue;
+ return ops;
+ }
+
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
+static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = {
+ [NFTA_SET_TABLE] = { .type = NLA_STRING },
+ [NFTA_SET_NAME] = { .type = NLA_STRING },
+ [NFTA_SET_FLAGS] = { .type = NLA_U32 },
+ [NFTA_SET_KEY_TYPE] = { .type = NLA_U32 },
+ [NFTA_SET_KEY_LEN] = { .type = NLA_U32 },
+ [NFTA_SET_DATA_TYPE] = { .type = NLA_U32 },
+ [NFTA_SET_DATA_LEN] = { .type = NLA_U32 },
+};
+
+static int nft_ctx_init_from_setattr(struct nft_ctx *ctx,
+ const struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const nla[])
+{
+ struct net *net = sock_net(skb->sk);
+ const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ const struct nft_af_info *afi;
+ const struct nft_table *table = NULL;
+
+ afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false);
+ if (IS_ERR(afi))
+ return PTR_ERR(afi);
+
+ if (nla[NFTA_SET_TABLE] != NULL) {
+ table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE]);
+ if (IS_ERR(table))
+ return PTR_ERR(table);
+ }
+
+ nft_ctx_init(ctx, skb, nlh, afi, table, NULL, nla);
+ return 0;
+}
+
+struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
+ const struct nlattr *nla)
+{
+ struct nft_set *set;
+
+ if (nla == NULL)
+ return ERR_PTR(-EINVAL);
+
+ list_for_each_entry(set, &table->sets, list) {
+ if (!nla_strcmp(nla, set->name))
+ return set;
+ }
+ return ERR_PTR(-ENOENT);
+}
+
+static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set,
+ const char *name)
+{
+ const struct nft_set *i;
+ const char *p;
+ unsigned long *inuse;
+ unsigned int n = 0;
+
+ p = strnchr(name, IFNAMSIZ, '%');
+ if (p != NULL) {
+ if (p[1] != 'd' || strchr(p + 2, '%'))
+ return -EINVAL;
+
+ inuse = (unsigned long *)get_zeroed_page(GFP_KERNEL);
+ if (inuse == NULL)
+ return -ENOMEM;
+
+ list_for_each_entry(i, &ctx->table->sets, list) {
+ if (!sscanf(i->name, name, &n))
+ continue;
+ if (n < 0 || n > BITS_PER_LONG * PAGE_SIZE)
+ continue;
+ set_bit(n, inuse);
+ }
+
+ n = find_first_zero_bit(inuse, BITS_PER_LONG * PAGE_SIZE);
+ free_page((unsigned long)inuse);
+ }
+
+ snprintf(set->name, sizeof(set->name), name, n);
+ list_for_each_entry(i, &ctx->table->sets, list) {
+ if (!strcmp(set->name, i->name))
+ return -ENFILE;
+ }
+ return 0;
+}
+
+static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
+ const struct nft_set *set, u16 event, u16 flags)
+{
+ struct nfgenmsg *nfmsg;
+ struct nlmsghdr *nlh;
+ u32 portid = NETLINK_CB(ctx->skb).portid;
+ u32 seq = ctx->nlh->nlmsg_seq;
+
+ event |= NFNL_SUBSYS_NFTABLES << 8;
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg),
+ flags);
+ if (nlh == NULL)
+ goto nla_put_failure;
+
+ nfmsg = nlmsg_data(nlh);
+ nfmsg->nfgen_family = ctx->afi->family;
+ nfmsg->version = NFNETLINK_V0;
+ nfmsg->res_id = 0;
+
+ if (nla_put_string(skb, NFTA_SET_TABLE, ctx->table->name))
+ goto nla_put_failure;
+ if (nla_put_string(skb, NFTA_SET_NAME, set->name))
+ goto nla_put_failure;
+ if (set->flags != 0)
+ if (nla_put_be32(skb, NFTA_SET_FLAGS, htonl(set->flags)))
+ goto nla_put_failure;
+
+ if (nla_put_be32(skb, NFTA_SET_KEY_TYPE, htonl(set->ktype)))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_SET_KEY_LEN, htonl(set->klen)))
+ goto nla_put_failure;
+ if (set->flags & NFT_SET_MAP) {
+ if (nla_put_be32(skb, NFTA_SET_DATA_TYPE, htonl(set->dtype)))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_SET_DATA_LEN, htonl(set->dlen)))
+ goto nla_put_failure;
+ }
+
+ return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+ nlmsg_trim(skb, nlh);
+ return -1;
+}
+
+static int nf_tables_set_notify(const struct nft_ctx *ctx,
+ const struct nft_set *set,
+ int event)
+{
+ struct sk_buff *skb;
+ u32 portid = NETLINK_CB(ctx->skb).portid;
+ bool report;
+ int err;
+
+ report = nlmsg_report(ctx->nlh);
+ if (!report && !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
+ return 0;
+
+ err = -ENOBUFS;
+ skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (skb == NULL)
+ goto err;
+
+ err = nf_tables_fill_set(skb, ctx, set, event, 0);
+ if (err < 0) {
+ kfree_skb(skb);
+ goto err;
+ }
+
+ err = nfnetlink_send(skb, ctx->net, portid, NFNLGRP_NFTABLES, report,
+ GFP_KERNEL);
+err:
+ if (err < 0)
+ nfnetlink_set_err(ctx->net, portid, NFNLGRP_NFTABLES, err);
+ return err;
+}
+
+static int nf_tables_dump_sets_table(struct nft_ctx *ctx, struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ const struct nft_set *set;
+ unsigned int idx = 0, s_idx = cb->args[0];
+
+ if (cb->args[1])
+ return skb->len;
+
+ list_for_each_entry(set, &ctx->table->sets, list) {
+ if (idx < s_idx)
+ goto cont;
+ if (nf_tables_fill_set(skb, ctx, set, NFT_MSG_NEWSET,
+ NLM_F_MULTI) < 0) {
+ cb->args[0] = idx;
+ goto done;
+ }
+cont:
+ idx++;
+ }
+ cb->args[1] = 1;
+done:
+ return skb->len;
+}
+
+static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ const struct nft_set *set;
+ unsigned int idx = 0, s_idx = cb->args[0];
+ struct nft_table *table, *cur_table = (struct nft_table *)cb->args[2];
+
+ if (cb->args[1])
+ return skb->len;
+
+ list_for_each_entry(table, &ctx->afi->tables, list) {
+ if (cur_table && cur_table != table)
+ continue;
+
+ ctx->table = table;
+ list_for_each_entry(set, &ctx->table->sets, list) {
+ if (idx < s_idx)
+ goto cont;
+ if (nf_tables_fill_set(skb, ctx, set, NFT_MSG_NEWSET,
+ NLM_F_MULTI) < 0) {
+ cb->args[0] = idx;
+ cb->args[2] = (unsigned long) table;
+ goto done;
+ }
+cont:
+ idx++;
+ }
+ }
+ cb->args[1] = 1;
+done:
+ return skb->len;
+}
+
+static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
+ struct nlattr *nla[NFTA_SET_MAX + 1];
+ struct nft_ctx ctx;
+ int err, ret;
+
+ err = nlmsg_parse(cb->nlh, sizeof(*nfmsg), nla, NFTA_SET_MAX,
+ nft_set_policy);
+ if (err < 0)
+ return err;
+
+ err = nft_ctx_init_from_setattr(&ctx, cb->skb, cb->nlh, (void *)nla);
+ if (err < 0)
+ return err;
+
+ if (ctx.table == NULL)
+ ret = nf_tables_dump_sets_all(&ctx, skb, cb);
+ else
+ ret = nf_tables_dump_sets_table(&ctx, skb, cb);
+
+ return ret;
+}
+
+static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const nla[])
+{
+ const struct nft_set *set;
+ struct nft_ctx ctx;
+ struct sk_buff *skb2;
+ int err;
+
+ /* Verify existance before starting dump */
+ err = nft_ctx_init_from_setattr(&ctx, skb, nlh, nla);
+ if (err < 0)
+ return err;
+
+ if (nlh->nlmsg_flags & NLM_F_DUMP) {
+ struct netlink_dump_control c = {
+ .dump = nf_tables_dump_sets,
+ };
+ return netlink_dump_start(nlsk, skb, nlh, &c);
+ }
+
+ set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]);
+ if (IS_ERR(set))
+ return PTR_ERR(set);
+
+ skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (skb2 == NULL)
+ return -ENOMEM;
+
+ err = nf_tables_fill_set(skb2, &ctx, set, NFT_MSG_NEWSET, 0);
+ if (err < 0)
+ goto err;
+
+ return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid);
+
+err:
+ kfree_skb(skb2);
+ return err;
+}
+
+static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const nla[])
+{
+ const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ const struct nft_set_ops *ops;
+ const struct nft_af_info *afi;
+ struct net *net = sock_net(skb->sk);
+ struct nft_table *table;
+ struct nft_set *set;
+ struct nft_ctx ctx;
+ char name[IFNAMSIZ];
+ unsigned int size;
+ bool create;
+ u32 ktype, klen, dlen, dtype, flags;
+ int err;
+
+ if (nla[NFTA_SET_TABLE] == NULL ||
+ nla[NFTA_SET_NAME] == NULL ||
+ nla[NFTA_SET_KEY_LEN] == NULL)
+ return -EINVAL;
+
+ ktype = NFT_DATA_VALUE;
+ if (nla[NFTA_SET_KEY_TYPE] != NULL) {
+ ktype = ntohl(nla_get_be32(nla[NFTA_SET_KEY_TYPE]));
+ if ((ktype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK)
+ return -EINVAL;
+ }
+
+ klen = ntohl(nla_get_be32(nla[NFTA_SET_KEY_LEN]));
+ if (klen == 0 || klen > FIELD_SIZEOF(struct nft_data, data))
+ return -EINVAL;
+
+ flags = 0;
+ if (nla[NFTA_SET_FLAGS] != NULL) {
+ flags = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS]));
+ if (flags & ~(NFT_SET_ANONYMOUS | NFT_SET_CONSTANT |
+ NFT_SET_INTERVAL | NFT_SET_MAP))
+ return -EINVAL;
+ }
+
+ dtype = 0;
+ dlen = 0;
+ if (nla[NFTA_SET_DATA_TYPE] != NULL) {
+ if (!(flags & NFT_SET_MAP))
+ return -EINVAL;
+
+ dtype = ntohl(nla_get_be32(nla[NFTA_SET_DATA_TYPE]));
+ if ((dtype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK &&
+ dtype != NFT_DATA_VERDICT)
+ return -EINVAL;
+
+ if (dtype != NFT_DATA_VERDICT) {
+ if (nla[NFTA_SET_DATA_LEN] == NULL)
+ return -EINVAL;
+ dlen = ntohl(nla_get_be32(nla[NFTA_SET_DATA_LEN]));
+ if (dlen == 0 ||
+ dlen > FIELD_SIZEOF(struct nft_data, data))
+ return -EINVAL;
+ } else
+ dlen = sizeof(struct nft_data);
+ } else if (flags & NFT_SET_MAP)
+ return -EINVAL;
+
+ create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
+
+ afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, create);
+ if (IS_ERR(afi))
+ return PTR_ERR(afi);
+
+ table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE]);
+ if (IS_ERR(table))
+ return PTR_ERR(table);
+
+ nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla);
+
+ set = nf_tables_set_lookup(table, nla[NFTA_SET_NAME]);
+ if (IS_ERR(set)) {
+ if (PTR_ERR(set) != -ENOENT)
+ return PTR_ERR(set);
+ set = NULL;
+ }
+
+ if (set != NULL) {
+ if (nlh->nlmsg_flags & NLM_F_EXCL)
+ return -EEXIST;
+ if (nlh->nlmsg_flags & NLM_F_REPLACE)
+ return -EOPNOTSUPP;
+ return 0;
+ }
+
+ if (!(nlh->nlmsg_flags & NLM_F_CREATE))
+ return -ENOENT;
+
+ ops = nft_select_set_ops(nla);
+ if (IS_ERR(ops))
+ return PTR_ERR(ops);
+
+ size = 0;
+ if (ops->privsize != NULL)
+ size = ops->privsize(nla);
+
+ err = -ENOMEM;
+ set = kzalloc(sizeof(*set) + size, GFP_KERNEL);
+ if (set == NULL)
+ goto err1;
+
+ nla_strlcpy(name, nla[NFTA_SET_NAME], sizeof(set->name));
+ err = nf_tables_set_alloc_name(&ctx, set, name);
+ if (err < 0)
+ goto err2;
+
+ INIT_LIST_HEAD(&set->bindings);
+ set->ops = ops;
+ set->ktype = ktype;
+ set->klen = klen;
+ set->dtype = dtype;
+ set->dlen = dlen;
+ set->flags = flags;
+
+ err = ops->init(set, nla);
+ if (err < 0)
+ goto err2;
+
+ list_add_tail(&set->list, &table->sets);
+ nf_tables_set_notify(&ctx, set, NFT_MSG_NEWSET);
+ return 0;
+
+err2:
+ kfree(set);
+err1:
+ module_put(ops->owner);
+ return err;
+}
+
+static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
+{
+ list_del(&set->list);
+ if (!(set->flags & NFT_SET_ANONYMOUS))
+ nf_tables_set_notify(ctx, set, NFT_MSG_DELSET);
+
+ set->ops->destroy(set);
+ module_put(set->ops->owner);
+ kfree(set);
+}
+
+static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const nla[])
+{
+ struct nft_set *set;
+ struct nft_ctx ctx;
+ int err;
+
+ if (nla[NFTA_SET_TABLE] == NULL)
+ return -EINVAL;
+
+ err = nft_ctx_init_from_setattr(&ctx, skb, nlh, nla);
+ if (err < 0)
+ return err;
+
+ set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]);
+ if (IS_ERR(set))
+ return PTR_ERR(set);
+ if (!list_empty(&set->bindings))
+ return -EBUSY;
+
+ nf_tables_set_destroy(&ctx, set);
+ return 0;
+}
+
+static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx,
+ const struct nft_set *set,
+ const struct nft_set_iter *iter,
+ const struct nft_set_elem *elem)
+{
+ enum nft_registers dreg;
+
+ dreg = nft_type_to_reg(set->dtype);
+ return nft_validate_data_load(ctx, dreg, &elem->data, set->dtype);
+}
+
+int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
+ struct nft_set_binding *binding)
+{
+ struct nft_set_binding *i;
+ struct nft_set_iter iter;
+
+ if (!list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS)
+ return -EBUSY;
+
+ if (set->flags & NFT_SET_MAP) {
+ /* If the set is already bound to the same chain all
+ * jumps are already validated for that chain.
+ */
+ list_for_each_entry(i, &set->bindings, list) {
+ if (i->chain == binding->chain)
+ goto bind;
+ }
+
+ iter.skip = 0;
+ iter.count = 0;
+ iter.err = 0;
+ iter.fn = nf_tables_bind_check_setelem;
+
+ set->ops->walk(ctx, set, &iter);
+ if (iter.err < 0) {
+ /* Destroy anonymous sets if binding fails */
+ if (set->flags & NFT_SET_ANONYMOUS)
+ nf_tables_set_destroy(ctx, set);
+
+ return iter.err;
+ }
+ }
+bind:
+ binding->chain = ctx->chain;
+ list_add_tail(&binding->list, &set->bindings);
+ return 0;
+}
+
+void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
+ struct nft_set_binding *binding)
+{
+ list_del(&binding->list);
+
+ if (list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS)
+ nf_tables_set_destroy(ctx, set);
+}
+
+/*
+ * Set elements
+ */
+
+static const struct nla_policy nft_set_elem_policy[NFTA_SET_ELEM_MAX + 1] = {
+ [NFTA_SET_ELEM_KEY] = { .type = NLA_NESTED },
+ [NFTA_SET_ELEM_DATA] = { .type = NLA_NESTED },
+ [NFTA_SET_ELEM_FLAGS] = { .type = NLA_U32 },
+};
+
+static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX + 1] = {
+ [NFTA_SET_ELEM_LIST_TABLE] = { .type = NLA_STRING },
+ [NFTA_SET_ELEM_LIST_SET] = { .type = NLA_STRING },
+ [NFTA_SET_ELEM_LIST_ELEMENTS] = { .type = NLA_NESTED },
+};
+
+static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx,
+ const struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const nla[])
+{
+ const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+ const struct nft_af_info *afi;
+ const struct nft_table *table;
+ struct net *net = sock_net(skb->sk);
+
+ afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false);
+ if (IS_ERR(afi))
+ return PTR_ERR(afi);
+
+ table = nf_tables_table_lookup(afi, nla[NFTA_SET_ELEM_LIST_TABLE]);
+ if (IS_ERR(table))
+ return PTR_ERR(table);
+
+ nft_ctx_init(ctx, skb, nlh, afi, table, NULL, nla);
+ return 0;
+}
+
+static int nf_tables_fill_setelem(struct sk_buff *skb,
+ const struct nft_set *set,
+ const struct nft_set_elem *elem)
+{
+ unsigned char *b = skb_tail_pointer(skb);
+ struct nlattr *nest;
+
+ nest = nla_nest_start(skb, NFTA_LIST_ELEM);
+ if (nest == NULL)
+ goto nla_put_failure;
+
+ if (nft_data_dump(skb, NFTA_SET_ELEM_KEY, &elem->key, NFT_DATA_VALUE,
+ set->klen) < 0)
+ goto nla_put_failure;
+
+ if (set->flags & NFT_SET_MAP &&
+ !(elem->flags & NFT_SET_ELEM_INTERVAL_END) &&
+ nft_data_dump(skb, NFTA_SET_ELEM_DATA, &elem->data,
+ set->dtype == NFT_DATA_VERDICT ? NFT_DATA_VERDICT : NFT_DATA_VALUE,
+ set->dlen) < 0)
+ goto nla_put_failure;
+
+ if (elem->flags != 0)
+ if (nla_put_be32(skb, NFTA_SET_ELEM_FLAGS, htonl(elem->flags)))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, nest);
+ return 0;
+
+nla_put_failure:
+ nlmsg_trim(skb, b);
+ return -EMSGSIZE;
+}
+
+struct nft_set_dump_args {
+ const struct netlink_callback *cb;
+ struct nft_set_iter iter;
+ struct sk_buff *skb;
+};
+
+static int nf_tables_dump_setelem(const struct nft_ctx *ctx,
+ const struct nft_set *set,
+ const struct nft_set_iter *iter,
+ const struct nft_set_elem *elem)
+{
+ struct nft_set_dump_args *args;
+
+ args = container_of(iter, struct nft_set_dump_args, iter);
+ return nf_tables_fill_setelem(args->skb, set, elem);
+}
+
+static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ const struct nft_set *set;
+ struct nft_set_dump_args args;
+ struct nft_ctx ctx;
+ struct nlattr *nla[NFTA_SET_ELEM_LIST_MAX + 1];
+ struct nfgenmsg *nfmsg;
+ struct nlmsghdr *nlh;
+ struct nlattr *nest;
+ u32 portid, seq;
+ int event, err;
+
+ nfmsg = nlmsg_data(cb->nlh);
+ err = nlmsg_parse(cb->nlh, sizeof(*nfmsg), nla, NFTA_SET_ELEM_LIST_MAX,
+ nft_set_elem_list_policy);
+ if (err < 0)
+ return err;
+
+ err = nft_ctx_init_from_elemattr(&ctx, cb->skb, cb->nlh, (void *)nla);
+ if (err < 0)
+ return err;
+
+ set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]);
+ if (IS_ERR(set))
+ return PTR_ERR(set);
+
+ event = NFT_MSG_NEWSETELEM;
+ event |= NFNL_SUBSYS_NFTABLES << 8;
+ portid = NETLINK_CB(cb->skb).portid;
+ seq = cb->nlh->nlmsg_seq;
+
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg),
+ NLM_F_MULTI);
+ if (nlh == NULL)
+ goto nla_put_failure;
+
+ nfmsg = nlmsg_data(nlh);
+ nfmsg->nfgen_family = NFPROTO_UNSPEC;
+ nfmsg->version = NFNETLINK_V0;
+ nfmsg->res_id = 0;
+
+ if (nla_put_string(skb, NFTA_SET_ELEM_LIST_TABLE, ctx.table->name))
+ goto nla_put_failure;
+ if (nla_put_string(skb, NFTA_SET_ELEM_LIST_SET, set->name))
+ goto nla_put_failure;
+
+ nest = nla_nest_start(skb, NFTA_SET_ELEM_LIST_ELEMENTS);
+ if (nest == NULL)
+ goto nla_put_failure;
+
+ args.cb = cb;
+ args.skb = skb;
+ args.iter.skip = cb->args[0];
+ args.iter.count = 0;
+ args.iter.err = 0;
+ args.iter.fn = nf_tables_dump_setelem;
+ set->ops->walk(&ctx, set, &args.iter);
+
+ nla_nest_end(skb, nest);
+ nlmsg_end(skb, nlh);
+
+ if (args.iter.err && args.iter.err != -EMSGSIZE)
+ return args.iter.err;
+ if (args.iter.count == cb->args[0])
+ return 0;
+
+ cb->args[0] = args.iter.count;
+ return skb->len;
+
+nla_put_failure:
+ return -ENOSPC;
+}
+
+static int nf_tables_getsetelem(struct sock *nlsk, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const nla[])
+{
+ const struct nft_set *set;
+ struct nft_ctx ctx;
+ int err;
+
+ err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla);
+ if (err < 0)
+ return err;
+
+ set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]);
+ if (IS_ERR(set))
+ return PTR_ERR(set);
+
+ if (nlh->nlmsg_flags & NLM_F_DUMP) {
+ struct netlink_dump_control c = {
+ .dump = nf_tables_dump_set,
+ };
+ return netlink_dump_start(nlsk, skb, nlh, &c);
+ }
+ return -EOPNOTSUPP;
+}
+
+static int nft_add_set_elem(const struct nft_ctx *ctx, struct nft_set *set,
+ const struct nlattr *attr)
+{
+ struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
+ struct nft_data_desc d1, d2;
+ struct nft_set_elem elem;
+ struct nft_set_binding *binding;
+ enum nft_registers dreg;
+ int err;
+
+ err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr,
+ nft_set_elem_policy);
+ if (err < 0)
+ return err;
+
+ if (nla[NFTA_SET_ELEM_KEY] == NULL)
+ return -EINVAL;
+
+ elem.flags = 0;
+ if (nla[NFTA_SET_ELEM_FLAGS] != NULL) {
+ elem.flags = ntohl(nla_get_be32(nla[NFTA_SET_ELEM_FLAGS]));
+ if (elem.flags & ~NFT_SET_ELEM_INTERVAL_END)
+ return -EINVAL;
+ }
+
+ if (set->flags & NFT_SET_MAP) {
+ if (nla[NFTA_SET_ELEM_DATA] == NULL &&
+ !(elem.flags & NFT_SET_ELEM_INTERVAL_END))
+ return -EINVAL;
+ } else {
+ if (nla[NFTA_SET_ELEM_DATA] != NULL)
+ return -EINVAL;
+ }
+
+ err = nft_data_init(ctx, &elem.key, &d1, nla[NFTA_SET_ELEM_KEY]);
+ if (err < 0)
+ goto err1;
+ err = -EINVAL;
+ if (d1.type != NFT_DATA_VALUE || d1.len != set->klen)
+ goto err2;
+
+ err = -EEXIST;
+ if (set->ops->get(set, &elem) == 0)
+ goto err2;
+
+ if (nla[NFTA_SET_ELEM_DATA] != NULL) {
+ err = nft_data_init(ctx, &elem.data, &d2, nla[NFTA_SET_ELEM_DATA]);
+ if (err < 0)
+ goto err2;
+
+ err = -EINVAL;
+ if (set->dtype != NFT_DATA_VERDICT && d2.len != set->dlen)
+ goto err3;
+
+ dreg = nft_type_to_reg(set->dtype);
+ list_for_each_entry(binding, &set->bindings, list) {
+ struct nft_ctx bind_ctx = {
+ .afi = ctx->afi,
+ .table = ctx->table,
+ .chain = binding->chain,
+ };
+
+ err = nft_validate_data_load(&bind_ctx, dreg,
+ &elem.data, d2.type);
+ if (err < 0)
+ goto err3;
+ }
+ }
+
+ err = set->ops->insert(set, &elem);
+ if (err < 0)
+ goto err3;
+
+ return 0;
+
+err3:
+ if (nla[NFTA_SET_ELEM_DATA] != NULL)
+ nft_data_uninit(&elem.data, d2.type);
+err2:
+ nft_data_uninit(&elem.key, d1.type);
+err1:
+ return err;
+}
+
+static int nf_tables_newsetelem(struct sock *nlsk, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const nla[])
+{
+ const struct nlattr *attr;
+ struct nft_set *set;
+ struct nft_ctx ctx;
+ int rem, err;
+
+ err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla);
+ if (err < 0)
+ return err;
+
+ set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]);
+ if (IS_ERR(set))
+ return PTR_ERR(set);
+ if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT)
+ return -EBUSY;
+
+ nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
+ err = nft_add_set_elem(&ctx, set, attr);
+ if (err < 0)
+ return err;
+ }
+ return 0;
+}
+
+static int nft_del_setelem(const struct nft_ctx *ctx, struct nft_set *set,
+ const struct nlattr *attr)
+{
+ struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
+ struct nft_data_desc desc;
+ struct nft_set_elem elem;
+ int err;
+
+ err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr,
+ nft_set_elem_policy);
+ if (err < 0)
+ goto err1;
+
+ err = -EINVAL;
+ if (nla[NFTA_SET_ELEM_KEY] == NULL)
+ goto err1;
+
+ err = nft_data_init(ctx, &elem.key, &desc, nla[NFTA_SET_ELEM_KEY]);
+ if (err < 0)
+ goto err1;
+
+ err = -EINVAL;
+ if (desc.type != NFT_DATA_VALUE || desc.len != set->klen)
+ goto err2;
+
+ err = set->ops->get(set, &elem);
+ if (err < 0)
+ goto err2;
+
+ set->ops->remove(set, &elem);
+
+ nft_data_uninit(&elem.key, NFT_DATA_VALUE);
+ if (set->flags & NFT_SET_MAP)
+ nft_data_uninit(&elem.data, set->dtype);
+
+err2:
+ nft_data_uninit(&elem.key, desc.type);
+err1:
+ return err;
+}
+
+static int nf_tables_delsetelem(struct sock *nlsk, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const nla[])
+{
+ const struct nlattr *attr;
+ struct nft_set *set;
+ struct nft_ctx ctx;
+ int rem, err;
+
+ err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla);
+ if (err < 0)
+ return err;
+
+ set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]);
+ if (IS_ERR(set))
+ return PTR_ERR(set);
+ if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT)
+ return -EBUSY;
+
+ nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
+ err = nft_del_setelem(&ctx, set, attr);
+ if (err < 0)
+ return err;
+ }
+ return 0;
+}
+
+static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
+ [NFT_MSG_NEWTABLE] = {
+ .call = nf_tables_newtable,
+ .attr_count = NFTA_TABLE_MAX,
+ .policy = nft_table_policy,
+ },
+ [NFT_MSG_GETTABLE] = {
+ .call = nf_tables_gettable,
+ .attr_count = NFTA_TABLE_MAX,
+ .policy = nft_table_policy,
+ },
+ [NFT_MSG_DELTABLE] = {
+ .call = nf_tables_deltable,
+ .attr_count = NFTA_TABLE_MAX,
+ .policy = nft_table_policy,
+ },
+ [NFT_MSG_NEWCHAIN] = {
+ .call = nf_tables_newchain,
+ .attr_count = NFTA_CHAIN_MAX,
+ .policy = nft_chain_policy,
+ },
+ [NFT_MSG_GETCHAIN] = {
+ .call = nf_tables_getchain,
+ .attr_count = NFTA_CHAIN_MAX,
+ .policy = nft_chain_policy,
+ },
+ [NFT_MSG_DELCHAIN] = {
+ .call = nf_tables_delchain,
+ .attr_count = NFTA_CHAIN_MAX,
+ .policy = nft_chain_policy,
+ },
+ [NFT_MSG_NEWRULE] = {
+ .call_batch = nf_tables_newrule,
+ .attr_count = NFTA_RULE_MAX,
+ .policy = nft_rule_policy,
+ },
+ [NFT_MSG_GETRULE] = {
+ .call = nf_tables_getrule,
+ .attr_count = NFTA_RULE_MAX,
+ .policy = nft_rule_policy,
+ },
+ [NFT_MSG_DELRULE] = {
+ .call_batch = nf_tables_delrule,
+ .attr_count = NFTA_RULE_MAX,
+ .policy = nft_rule_policy,
+ },
+ [NFT_MSG_NEWSET] = {
+ .call = nf_tables_newset,
+ .attr_count = NFTA_SET_MAX,
+ .policy = nft_set_policy,
+ },
+ [NFT_MSG_GETSET] = {
+ .call = nf_tables_getset,
+ .attr_count = NFTA_SET_MAX,
+ .policy = nft_set_policy,
+ },
+ [NFT_MSG_DELSET] = {
+ .call = nf_tables_delset,
+ .attr_count = NFTA_SET_MAX,
+ .policy = nft_set_policy,
+ },
+ [NFT_MSG_NEWSETELEM] = {
+ .call = nf_tables_newsetelem,
+ .attr_count = NFTA_SET_ELEM_LIST_MAX,
+ .policy = nft_set_elem_list_policy,
+ },
+ [NFT_MSG_GETSETELEM] = {
+ .call = nf_tables_getsetelem,
+ .attr_count = NFTA_SET_ELEM_LIST_MAX,
+ .policy = nft_set_elem_list_policy,
+ },
+ [NFT_MSG_DELSETELEM] = {
+ .call = nf_tables_delsetelem,
+ .attr_count = NFTA_SET_ELEM_LIST_MAX,
+ .policy = nft_set_elem_list_policy,
+ },
+};
+
+static const struct nfnetlink_subsystem nf_tables_subsys = {
+ .name = "nf_tables",
+ .subsys_id = NFNL_SUBSYS_NFTABLES,
+ .cb_count = NFT_MSG_MAX,
+ .cb = nf_tables_cb,
+ .commit = nf_tables_commit,
+ .abort = nf_tables_abort,
+};
+
+/*
+ * Loop detection - walk through the ruleset beginning at the destination chain
+ * of a new jump until either the source chain is reached (loop) or all
+ * reachable chains have been traversed.
+ *
+ * The loop check is performed whenever a new jump verdict is added to an
+ * expression or verdict map or a verdict map is bound to a new chain.
+ */
+
+static int nf_tables_check_loops(const struct nft_ctx *ctx,
+ const struct nft_chain *chain);
+
+static int nf_tables_loop_check_setelem(const struct nft_ctx *ctx,
+ const struct nft_set *set,
+ const struct nft_set_iter *iter,
+ const struct nft_set_elem *elem)
+{
+ switch (elem->data.verdict) {
+ case NFT_JUMP:
+ case NFT_GOTO:
+ return nf_tables_check_loops(ctx, elem->data.chain);
+ default:
+ return 0;
+ }
+}
+
+static int nf_tables_check_loops(const struct nft_ctx *ctx,
+ const struct nft_chain *chain)
+{
+ const struct nft_rule *rule;
+ const struct nft_expr *expr, *last;
+ const struct nft_set *set;
+ struct nft_set_binding *binding;
+ struct nft_set_iter iter;
+
+ if (ctx->chain == chain)
+ return -ELOOP;
+
+ list_for_each_entry(rule, &chain->rules, list) {
+ nft_rule_for_each_expr(expr, last, rule) {
+ const struct nft_data *data = NULL;
+ int err;
+
+ if (!expr->ops->validate)
+ continue;
+
+ err = expr->ops->validate(ctx, expr, &data);
+ if (err < 0)
+ return err;
+
+ if (data == NULL)
+ continue;
+
+ switch (data->verdict) {
+ case NFT_JUMP:
+ case NFT_GOTO:
+ err = nf_tables_check_loops(ctx, data->chain);
+ if (err < 0)
+ return err;
+ default:
+ break;
+ }
+ }
+ }
+
+ list_for_each_entry(set, &ctx->table->sets, list) {
+ if (!(set->flags & NFT_SET_MAP) ||
+ set->dtype != NFT_DATA_VERDICT)
+ continue;
+
+ list_for_each_entry(binding, &set->bindings, list) {
+ if (binding->chain != chain)
+ continue;
+
+ iter.skip = 0;
+ iter.count = 0;
+ iter.err = 0;
+ iter.fn = nf_tables_loop_check_setelem;
+
+ set->ops->walk(ctx, set, &iter);
+ if (iter.err < 0)
+ return iter.err;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * nft_validate_input_register - validate an expressions' input register
+ *
+ * @reg: the register number
+ *
+ * Validate that the input register is one of the general purpose
+ * registers.
+ */
+int nft_validate_input_register(enum nft_registers reg)
+{
+ if (reg <= NFT_REG_VERDICT)
+ return -EINVAL;
+ if (reg > NFT_REG_MAX)
+ return -ERANGE;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nft_validate_input_register);
+
+/**
+ * nft_validate_output_register - validate an expressions' output register
+ *
+ * @reg: the register number
+ *
+ * Validate that the output register is one of the general purpose
+ * registers or the verdict register.
+ */
+int nft_validate_output_register(enum nft_registers reg)
+{
+ if (reg < NFT_REG_VERDICT)
+ return -EINVAL;
+ if (reg > NFT_REG_MAX)
+ return -ERANGE;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nft_validate_output_register);
+
+/**
+ * nft_validate_data_load - validate an expressions' data load
+ *
+ * @ctx: context of the expression performing the load
+ * @reg: the destination register number
+ * @data: the data to load
+ * @type: the data type
+ *
+ * Validate that a data load uses the appropriate data type for
+ * the destination register. A value of NULL for the data means
+ * that its runtime gathered data, which is always of type
+ * NFT_DATA_VALUE.
+ */
+int nft_validate_data_load(const struct nft_ctx *ctx, enum nft_registers reg,
+ const struct nft_data *data,
+ enum nft_data_types type)
+{
+ int err;
+
+ switch (reg) {
+ case NFT_REG_VERDICT:
+ if (data == NULL || type != NFT_DATA_VERDICT)
+ return -EINVAL;
+
+ if (data->verdict == NFT_GOTO || data->verdict == NFT_JUMP) {
+ err = nf_tables_check_loops(ctx, data->chain);
+ if (err < 0)
+ return err;
+
+ if (ctx->chain->level + 1 > data->chain->level) {
+ if (ctx->chain->level + 1 == NFT_JUMP_STACK_SIZE)
+ return -EMLINK;
+ data->chain->level = ctx->chain->level + 1;
+ }
+ }
+
+ return 0;
+ default:
+ if (data != NULL && type != NFT_DATA_VALUE)
+ return -EINVAL;
+ return 0;
+ }
+}
+EXPORT_SYMBOL_GPL(nft_validate_data_load);
+
+static const struct nla_policy nft_verdict_policy[NFTA_VERDICT_MAX + 1] = {
+ [NFTA_VERDICT_CODE] = { .type = NLA_U32 },
+ [NFTA_VERDICT_CHAIN] = { .type = NLA_STRING,
+ .len = NFT_CHAIN_MAXNAMELEN - 1 },
+};
+
+static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
+ struct nft_data_desc *desc, const struct nlattr *nla)
+{
+ struct nlattr *tb[NFTA_VERDICT_MAX + 1];
+ struct nft_chain *chain;
+ int err;
+
+ err = nla_parse_nested(tb, NFTA_VERDICT_MAX, nla, nft_verdict_policy);
+ if (err < 0)
+ return err;
+
+ if (!tb[NFTA_VERDICT_CODE])
+ return -EINVAL;
+ data->verdict = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE]));
+
+ switch (data->verdict) {
+ case NF_ACCEPT:
+ case NF_DROP:
+ case NF_QUEUE:
+ case NFT_CONTINUE:
+ case NFT_BREAK:
+ case NFT_RETURN:
+ desc->len = sizeof(data->verdict);
+ break;
+ case NFT_JUMP:
+ case NFT_GOTO:
+ if (!tb[NFTA_VERDICT_CHAIN])
+ return -EINVAL;
+ chain = nf_tables_chain_lookup(ctx->table,
+ tb[NFTA_VERDICT_CHAIN]);
+ if (IS_ERR(chain))
+ return PTR_ERR(chain);
+ if (chain->flags & NFT_BASE_CHAIN)
+ return -EOPNOTSUPP;
+
+ chain->use++;
+ data->chain = chain;
+ desc->len = sizeof(data);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ desc->type = NFT_DATA_VERDICT;
+ return 0;
+}
+
+static void nft_verdict_uninit(const struct nft_data *data)
+{
+ switch (data->verdict) {
+ case NFT_JUMP:
+ case NFT_GOTO:
+ data->chain->use--;
+ break;
+ }
+}
+
+static int nft_verdict_dump(struct sk_buff *skb, const struct nft_data *data)
+{
+ struct nlattr *nest;
+
+ nest = nla_nest_start(skb, NFTA_DATA_VERDICT);
+ if (!nest)
+ goto nla_put_failure;
+
+ if (nla_put_be32(skb, NFTA_VERDICT_CODE, htonl(data->verdict)))
+ goto nla_put_failure;
+
+ switch (data->verdict) {
+ case NFT_JUMP:
+ case NFT_GOTO:
+ if (nla_put_string(skb, NFTA_VERDICT_CHAIN, data->chain->name))
+ goto nla_put_failure;
+ }
+ nla_nest_end(skb, nest);
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static int nft_value_init(const struct nft_ctx *ctx, struct nft_data *data,
+ struct nft_data_desc *desc, const struct nlattr *nla)
+{
+ unsigned int len;
+
+ len = nla_len(nla);
+ if (len == 0)
+ return -EINVAL;
+ if (len > sizeof(data->data))
+ return -EOVERFLOW;
+
+ nla_memcpy(data->data, nla, sizeof(data->data));
+ desc->type = NFT_DATA_VALUE;
+ desc->len = len;
+ return 0;
+}
+
+static int nft_value_dump(struct sk_buff *skb, const struct nft_data *data,
+ unsigned int len)
+{
+ return nla_put(skb, NFTA_DATA_VALUE, len, data->data);
+}
+
+static const struct nla_policy nft_data_policy[NFTA_DATA_MAX + 1] = {
+ [NFTA_DATA_VALUE] = { .type = NLA_BINARY,
+ .len = FIELD_SIZEOF(struct nft_data, data) },
+ [NFTA_DATA_VERDICT] = { .type = NLA_NESTED },
+};
+
+/**
+ * nft_data_init - parse nf_tables data netlink attributes
+ *
+ * @ctx: context of the expression using the data
+ * @data: destination struct nft_data
+ * @desc: data description
+ * @nla: netlink attribute containing data
+ *
+ * Parse the netlink data attributes and initialize a struct nft_data.
+ * The type and length of data are returned in the data description.
+ *
+ * The caller can indicate that it only wants to accept data of type
+ * NFT_DATA_VALUE by passing NULL for the ctx argument.
+ */
+int nft_data_init(const struct nft_ctx *ctx, struct nft_data *data,
+ struct nft_data_desc *desc, const struct nlattr *nla)
+{
+ struct nlattr *tb[NFTA_DATA_MAX + 1];
+ int err;
+
+ err = nla_parse_nested(tb, NFTA_DATA_MAX, nla, nft_data_policy);
+ if (err < 0)
+ return err;
+
+ if (tb[NFTA_DATA_VALUE])
+ return nft_value_init(ctx, data, desc, tb[NFTA_DATA_VALUE]);
+ if (tb[NFTA_DATA_VERDICT] && ctx != NULL)
+ return nft_verdict_init(ctx, data, desc, tb[NFTA_DATA_VERDICT]);
+ return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(nft_data_init);
+
+/**
+ * nft_data_uninit - release a nft_data item
+ *
+ * @data: struct nft_data to release
+ * @type: type of data
+ *
+ * Release a nft_data item. NFT_DATA_VALUE types can be silently discarded,
+ * all others need to be released by calling this function.
+ */
+void nft_data_uninit(const struct nft_data *data, enum nft_data_types type)
+{
+ switch (type) {
+ case NFT_DATA_VALUE:
+ return;
+ case NFT_DATA_VERDICT:
+ return nft_verdict_uninit(data);
+ default:
+ WARN_ON(1);
+ }
+}
+EXPORT_SYMBOL_GPL(nft_data_uninit);
+
+int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data,
+ enum nft_data_types type, unsigned int len)
+{
+ struct nlattr *nest;
+ int err;
+
+ nest = nla_nest_start(skb, attr);
+ if (nest == NULL)
+ return -1;
+
+ switch (type) {
+ case NFT_DATA_VALUE:
+ err = nft_value_dump(skb, data, len);
+ break;
+ case NFT_DATA_VERDICT:
+ err = nft_verdict_dump(skb, data);
+ break;
+ default:
+ err = -EINVAL;
+ WARN_ON(1);
+ }
+
+ nla_nest_end(skb, nest);
+ return err;
+}
+EXPORT_SYMBOL_GPL(nft_data_dump);
+
+static int nf_tables_init_net(struct net *net)
+{
+ INIT_LIST_HEAD(&net->nft.af_info);
+ INIT_LIST_HEAD(&net->nft.commit_list);
+ return 0;
+}
+
+static struct pernet_operations nf_tables_net_ops = {
+ .init = nf_tables_init_net,
+};
+
+static int __init nf_tables_module_init(void)
+{
+ int err;
+
+ info = kmalloc(sizeof(struct nft_expr_info) * NFT_RULE_MAXEXPRS,
+ GFP_KERNEL);
+ if (info == NULL) {
+ err = -ENOMEM;
+ goto err1;
+ }
+
+ err = nf_tables_core_module_init();
+ if (err < 0)
+ goto err2;
+
+ err = nfnetlink_subsys_register(&nf_tables_subsys);
+ if (err < 0)
+ goto err3;
+
+ pr_info("nf_tables: (c) 2007-2009 Patrick McHardy <kaber@trash.net>\n");
+ return register_pernet_subsys(&nf_tables_net_ops);
+err3:
+ nf_tables_core_module_exit();
+err2:
+ kfree(info);
+err1:
+ return err;
+}
+
+static void __exit nf_tables_module_exit(void)
+{
+ unregister_pernet_subsys(&nf_tables_net_ops);
+ nfnetlink_subsys_unregister(&nf_tables_subsys);
+ nf_tables_core_module_exit();
+ kfree(info);
+}
+
+module_init(nf_tables_module_init);
+module_exit(nf_tables_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_NFTABLES);
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
new file mode 100644
index 000000000000..cb9e685caae1
--- /dev/null
+++ b/net/netfilter/nf_tables_core.c
@@ -0,0 +1,270 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/rculist.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_log.h>
+
+static void nft_cmp_fast_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1])
+{
+ const struct nft_cmp_fast_expr *priv = nft_expr_priv(expr);
+ u32 mask;
+
+ mask = ~0U >> (sizeof(priv->data) * BITS_PER_BYTE - priv->len);
+ if ((data[priv->sreg].data[0] & mask) == priv->data)
+ return;
+ data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+}
+
+static bool nft_payload_fast_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_payload *priv = nft_expr_priv(expr);
+ const struct sk_buff *skb = pkt->skb;
+ struct nft_data *dest = &data[priv->dreg];
+ unsigned char *ptr;
+
+ if (priv->base == NFT_PAYLOAD_NETWORK_HEADER)
+ ptr = skb_network_header(skb);
+ else
+ ptr = skb_network_header(skb) + pkt->xt.thoff;
+
+ ptr += priv->offset;
+
+ if (unlikely(ptr + priv->len >= skb_tail_pointer(skb)))
+ return false;
+
+ if (priv->len == 2)
+ *(u16 *)dest->data = *(u16 *)ptr;
+ else if (priv->len == 4)
+ *(u32 *)dest->data = *(u32 *)ptr;
+ else
+ *(u8 *)dest->data = *(u8 *)ptr;
+ return true;
+}
+
+struct nft_jumpstack {
+ const struct nft_chain *chain;
+ const struct nft_rule *rule;
+ int rulenum;
+};
+
+static inline void
+nft_chain_stats(const struct nft_chain *this, const struct nft_pktinfo *pkt,
+ struct nft_jumpstack *jumpstack, unsigned int stackptr)
+{
+ struct nft_stats __percpu *stats;
+ const struct nft_chain *chain = stackptr ? jumpstack[0].chain : this;
+
+ rcu_read_lock_bh();
+ stats = rcu_dereference(nft_base_chain(chain)->stats);
+ __this_cpu_inc(stats->pkts);
+ __this_cpu_add(stats->bytes, pkt->skb->len);
+ rcu_read_unlock_bh();
+}
+
+enum nft_trace {
+ NFT_TRACE_RULE,
+ NFT_TRACE_RETURN,
+ NFT_TRACE_POLICY,
+};
+
+static const char *const comments[] = {
+ [NFT_TRACE_RULE] = "rule",
+ [NFT_TRACE_RETURN] = "return",
+ [NFT_TRACE_POLICY] = "policy",
+};
+
+static struct nf_loginfo trace_loginfo = {
+ .type = NF_LOG_TYPE_LOG,
+ .u = {
+ .log = {
+ .level = 4,
+ .logflags = NF_LOG_MASK,
+ },
+ },
+};
+
+static inline void nft_trace_packet(const struct nft_pktinfo *pkt,
+ const struct nft_chain *chain,
+ int rulenum, enum nft_trace type)
+{
+ struct net *net = dev_net(pkt->in ? pkt->in : pkt->out);
+
+ nf_log_packet(net, pkt->xt.family, pkt->hooknum, pkt->skb, pkt->in,
+ pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ",
+ chain->table->name, chain->name, comments[type],
+ rulenum);
+}
+
+unsigned int
+nft_do_chain_pktinfo(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops)
+{
+ const struct nft_chain *chain = ops->priv;
+ const struct nft_rule *rule;
+ const struct nft_expr *expr, *last;
+ struct nft_data data[NFT_REG_MAX + 1];
+ unsigned int stackptr = 0;
+ struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE];
+ int rulenum = 0;
+ /*
+ * Cache cursor to avoid problems in case that the cursor is updated
+ * while traversing the ruleset.
+ */
+ unsigned int gencursor = ACCESS_ONCE(chain->net->nft.gencursor);
+
+do_chain:
+ rule = list_entry(&chain->rules, struct nft_rule, list);
+next_rule:
+ data[NFT_REG_VERDICT].verdict = NFT_CONTINUE;
+ list_for_each_entry_continue_rcu(rule, &chain->rules, list) {
+
+ /* This rule is not active, skip. */
+ if (unlikely(rule->genmask & (1 << gencursor)))
+ continue;
+
+ rulenum++;
+
+ nft_rule_for_each_expr(expr, last, rule) {
+ if (expr->ops == &nft_cmp_fast_ops)
+ nft_cmp_fast_eval(expr, data);
+ else if (expr->ops != &nft_payload_fast_ops ||
+ !nft_payload_fast_eval(expr, data, pkt))
+ expr->ops->eval(expr, data, pkt);
+
+ if (data[NFT_REG_VERDICT].verdict != NFT_CONTINUE)
+ break;
+ }
+
+ switch (data[NFT_REG_VERDICT].verdict) {
+ case NFT_BREAK:
+ data[NFT_REG_VERDICT].verdict = NFT_CONTINUE;
+ /* fall through */
+ case NFT_CONTINUE:
+ continue;
+ }
+ break;
+ }
+
+ switch (data[NFT_REG_VERDICT].verdict) {
+ case NF_ACCEPT:
+ case NF_DROP:
+ case NF_QUEUE:
+ if (unlikely(pkt->skb->nf_trace))
+ nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
+
+ return data[NFT_REG_VERDICT].verdict;
+ case NFT_JUMP:
+ if (unlikely(pkt->skb->nf_trace))
+ nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
+
+ BUG_ON(stackptr >= NFT_JUMP_STACK_SIZE);
+ jumpstack[stackptr].chain = chain;
+ jumpstack[stackptr].rule = rule;
+ jumpstack[stackptr].rulenum = rulenum;
+ stackptr++;
+ /* fall through */
+ case NFT_GOTO:
+ chain = data[NFT_REG_VERDICT].chain;
+ goto do_chain;
+ case NFT_RETURN:
+ if (unlikely(pkt->skb->nf_trace))
+ nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RETURN);
+
+ /* fall through */
+ case NFT_CONTINUE:
+ break;
+ default:
+ WARN_ON(1);
+ }
+
+ if (stackptr > 0) {
+ if (unlikely(pkt->skb->nf_trace))
+ nft_trace_packet(pkt, chain, ++rulenum, NFT_TRACE_RETURN);
+
+ stackptr--;
+ chain = jumpstack[stackptr].chain;
+ rule = jumpstack[stackptr].rule;
+ rulenum = jumpstack[stackptr].rulenum;
+ goto next_rule;
+ }
+ nft_chain_stats(chain, pkt, jumpstack, stackptr);
+
+ if (unlikely(pkt->skb->nf_trace))
+ nft_trace_packet(pkt, chain, ++rulenum, NFT_TRACE_POLICY);
+
+ return nft_base_chain(chain)->policy;
+}
+EXPORT_SYMBOL_GPL(nft_do_chain_pktinfo);
+
+int __init nf_tables_core_module_init(void)
+{
+ int err;
+
+ err = nft_immediate_module_init();
+ if (err < 0)
+ goto err1;
+
+ err = nft_cmp_module_init();
+ if (err < 0)
+ goto err2;
+
+ err = nft_lookup_module_init();
+ if (err < 0)
+ goto err3;
+
+ err = nft_bitwise_module_init();
+ if (err < 0)
+ goto err4;
+
+ err = nft_byteorder_module_init();
+ if (err < 0)
+ goto err5;
+
+ err = nft_payload_module_init();
+ if (err < 0)
+ goto err6;
+
+ return 0;
+
+err6:
+ nft_byteorder_module_exit();
+err5:
+ nft_bitwise_module_exit();
+err4:
+ nft_lookup_module_exit();
+err3:
+ nft_cmp_module_exit();
+err2:
+ nft_immediate_module_exit();
+err1:
+ return err;
+}
+
+void nf_tables_core_module_exit(void)
+{
+ nft_payload_module_exit();
+ nft_byteorder_module_exit();
+ nft_bitwise_module_exit();
+ nft_lookup_module_exit();
+ nft_cmp_module_exit();
+ nft_immediate_module_exit();
+}
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 572d87dc116f..027f16af51a0 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -147,9 +147,6 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
const struct nfnetlink_subsystem *ss;
int type, err;
- if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
- return -EPERM;
-
/* All the messages must at least contain nfgenmsg */
if (nlmsg_len(nlh) < sizeof(struct nfgenmsg))
return 0;
@@ -217,9 +214,179 @@ replay:
}
}
+static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
+ u_int16_t subsys_id)
+{
+ struct sk_buff *nskb, *oskb = skb;
+ struct net *net = sock_net(skb->sk);
+ const struct nfnetlink_subsystem *ss;
+ const struct nfnl_callback *nc;
+ bool success = true, done = false;
+ int err;
+
+ if (subsys_id >= NFNL_SUBSYS_COUNT)
+ return netlink_ack(skb, nlh, -EINVAL);
+replay:
+ nskb = netlink_skb_clone(oskb, GFP_KERNEL);
+ if (!nskb)
+ return netlink_ack(oskb, nlh, -ENOMEM);
+
+ nskb->sk = oskb->sk;
+ skb = nskb;
+
+ nfnl_lock(subsys_id);
+ ss = rcu_dereference_protected(table[subsys_id].subsys,
+ lockdep_is_held(&table[subsys_id].mutex));
+ if (!ss) {
+#ifdef CONFIG_MODULES
+ nfnl_unlock(subsys_id);
+ request_module("nfnetlink-subsys-%d", subsys_id);
+ nfnl_lock(subsys_id);
+ ss = rcu_dereference_protected(table[subsys_id].subsys,
+ lockdep_is_held(&table[subsys_id].mutex));
+ if (!ss)
+#endif
+ {
+ nfnl_unlock(subsys_id);
+ kfree_skb(nskb);
+ return netlink_ack(skb, nlh, -EOPNOTSUPP);
+ }
+ }
+
+ if (!ss->commit || !ss->abort) {
+ nfnl_unlock(subsys_id);
+ kfree_skb(nskb);
+ return netlink_ack(skb, nlh, -EOPNOTSUPP);
+ }
+
+ while (skb->len >= nlmsg_total_size(0)) {
+ int msglen, type;
+
+ nlh = nlmsg_hdr(skb);
+ err = 0;
+
+ if (nlh->nlmsg_len < NLMSG_HDRLEN) {
+ err = -EINVAL;
+ goto ack;
+ }
+
+ /* Only requests are handled by the kernel */
+ if (!(nlh->nlmsg_flags & NLM_F_REQUEST)) {
+ err = -EINVAL;
+ goto ack;
+ }
+
+ type = nlh->nlmsg_type;
+ if (type == NFNL_MSG_BATCH_BEGIN) {
+ /* Malformed: Batch begin twice */
+ success = false;
+ goto done;
+ } else if (type == NFNL_MSG_BATCH_END) {
+ done = true;
+ goto done;
+ } else if (type < NLMSG_MIN_TYPE) {
+ err = -EINVAL;
+ goto ack;
+ }
+
+ /* We only accept a batch with messages for the same
+ * subsystem.
+ */
+ if (NFNL_SUBSYS_ID(type) != subsys_id) {
+ err = -EINVAL;
+ goto ack;
+ }
+
+ nc = nfnetlink_find_client(type, ss);
+ if (!nc) {
+ err = -EINVAL;
+ goto ack;
+ }
+
+ {
+ int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
+ u_int8_t cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type);
+ struct nlattr *cda[ss->cb[cb_id].attr_count + 1];
+ struct nlattr *attr = (void *)nlh + min_len;
+ int attrlen = nlh->nlmsg_len - min_len;
+
+ err = nla_parse(cda, ss->cb[cb_id].attr_count,
+ attr, attrlen, ss->cb[cb_id].policy);
+ if (err < 0)
+ goto ack;
+
+ if (nc->call_batch) {
+ err = nc->call_batch(net->nfnl, skb, nlh,
+ (const struct nlattr **)cda);
+ }
+
+ /* The lock was released to autoload some module, we
+ * have to abort and start from scratch using the
+ * original skb.
+ */
+ if (err == -EAGAIN) {
+ ss->abort(skb);
+ nfnl_unlock(subsys_id);
+ kfree_skb(nskb);
+ goto replay;
+ }
+ }
+ack:
+ if (nlh->nlmsg_flags & NLM_F_ACK || err) {
+ /* We don't stop processing the batch on errors, thus,
+ * userspace gets all the errors that the batch
+ * triggers.
+ */
+ netlink_ack(skb, nlh, err);
+ if (err)
+ success = false;
+ }
+
+ msglen = NLMSG_ALIGN(nlh->nlmsg_len);
+ if (msglen > skb->len)
+ msglen = skb->len;
+ skb_pull(skb, msglen);
+ }
+done:
+ if (success && done)
+ ss->commit(skb);
+ else
+ ss->abort(skb);
+
+ nfnl_unlock(subsys_id);
+ kfree_skb(nskb);
+}
+
static void nfnetlink_rcv(struct sk_buff *skb)
{
- netlink_rcv_skb(skb, &nfnetlink_rcv_msg);
+ struct nlmsghdr *nlh = nlmsg_hdr(skb);
+ struct net *net = sock_net(skb->sk);
+ int msglen;
+
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+ return netlink_ack(skb, nlh, -EPERM);
+
+ if (nlh->nlmsg_len < NLMSG_HDRLEN ||
+ skb->len < nlh->nlmsg_len)
+ return;
+
+ if (nlh->nlmsg_type == NFNL_MSG_BATCH_BEGIN) {
+ struct nfgenmsg *nfgenmsg;
+
+ msglen = NLMSG_ALIGN(nlh->nlmsg_len);
+ if (msglen > skb->len)
+ msglen = skb->len;
+
+ if (nlh->nlmsg_len < NLMSG_HDRLEN ||
+ skb->len < NLMSG_HDRLEN + sizeof(struct nfgenmsg))
+ return;
+
+ nfgenmsg = nlmsg_data(nlh);
+ skb_pull(skb, msglen);
+ nfnetlink_rcv_batch(skb, nlh, nfgenmsg->res_id);
+ } else {
+ netlink_rcv_skb(skb, &nfnetlink_rcv_msg);
+ }
}
#ifdef CONFIG_MODULES
diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c
new file mode 100644
index 000000000000..4fb6ee2c1106
--- /dev/null
+++ b/net/netfilter/nft_bitwise.c
@@ -0,0 +1,146 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_bitwise {
+ enum nft_registers sreg:8;
+ enum nft_registers dreg:8;
+ u8 len;
+ struct nft_data mask;
+ struct nft_data xor;
+};
+
+static void nft_bitwise_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_bitwise *priv = nft_expr_priv(expr);
+ const struct nft_data *src = &data[priv->sreg];
+ struct nft_data *dst = &data[priv->dreg];
+ unsigned int i;
+
+ for (i = 0; i < DIV_ROUND_UP(priv->len, 4); i++) {
+ dst->data[i] = (src->data[i] & priv->mask.data[i]) ^
+ priv->xor.data[i];
+ }
+}
+
+static const struct nla_policy nft_bitwise_policy[NFTA_BITWISE_MAX + 1] = {
+ [NFTA_BITWISE_SREG] = { .type = NLA_U32 },
+ [NFTA_BITWISE_DREG] = { .type = NLA_U32 },
+ [NFTA_BITWISE_LEN] = { .type = NLA_U32 },
+ [NFTA_BITWISE_MASK] = { .type = NLA_NESTED },
+ [NFTA_BITWISE_XOR] = { .type = NLA_NESTED },
+};
+
+static int nft_bitwise_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_bitwise *priv = nft_expr_priv(expr);
+ struct nft_data_desc d1, d2;
+ int err;
+
+ if (tb[NFTA_BITWISE_SREG] == NULL ||
+ tb[NFTA_BITWISE_DREG] == NULL ||
+ tb[NFTA_BITWISE_LEN] == NULL ||
+ tb[NFTA_BITWISE_MASK] == NULL ||
+ tb[NFTA_BITWISE_XOR] == NULL)
+ return -EINVAL;
+
+ priv->sreg = ntohl(nla_get_be32(tb[NFTA_BITWISE_SREG]));
+ err = nft_validate_input_register(priv->sreg);
+ if (err < 0)
+ return err;
+
+ priv->dreg = ntohl(nla_get_be32(tb[NFTA_BITWISE_DREG]));
+ err = nft_validate_output_register(priv->dreg);
+ if (err < 0)
+ return err;
+ err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
+ if (err < 0)
+ return err;
+
+ priv->len = ntohl(nla_get_be32(tb[NFTA_BITWISE_LEN]));
+
+ err = nft_data_init(NULL, &priv->mask, &d1, tb[NFTA_BITWISE_MASK]);
+ if (err < 0)
+ return err;
+ if (d1.len != priv->len)
+ return -EINVAL;
+
+ err = nft_data_init(NULL, &priv->xor, &d2, tb[NFTA_BITWISE_XOR]);
+ if (err < 0)
+ return err;
+ if (d2.len != priv->len)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int nft_bitwise_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_bitwise *priv = nft_expr_priv(expr);
+
+ if (nla_put_be32(skb, NFTA_BITWISE_SREG, htonl(priv->sreg)))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_BITWISE_DREG, htonl(priv->dreg)))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_BITWISE_LEN, htonl(priv->len)))
+ goto nla_put_failure;
+
+ if (nft_data_dump(skb, NFTA_BITWISE_MASK, &priv->mask,
+ NFT_DATA_VALUE, priv->len) < 0)
+ goto nla_put_failure;
+
+ if (nft_data_dump(skb, NFTA_BITWISE_XOR, &priv->xor,
+ NFT_DATA_VALUE, priv->len) < 0)
+ goto nla_put_failure;
+
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static struct nft_expr_type nft_bitwise_type;
+static const struct nft_expr_ops nft_bitwise_ops = {
+ .type = &nft_bitwise_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_bitwise)),
+ .eval = nft_bitwise_eval,
+ .init = nft_bitwise_init,
+ .dump = nft_bitwise_dump,
+};
+
+static struct nft_expr_type nft_bitwise_type __read_mostly = {
+ .name = "bitwise",
+ .ops = &nft_bitwise_ops,
+ .policy = nft_bitwise_policy,
+ .maxattr = NFTA_BITWISE_MAX,
+ .owner = THIS_MODULE,
+};
+
+int __init nft_bitwise_module_init(void)
+{
+ return nft_register_expr(&nft_bitwise_type);
+}
+
+void nft_bitwise_module_exit(void)
+{
+ nft_unregister_expr(&nft_bitwise_type);
+}
diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c
new file mode 100644
index 000000000000..c39ed8d29df1
--- /dev/null
+++ b/net/netfilter/nft_byteorder.c
@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_byteorder {
+ enum nft_registers sreg:8;
+ enum nft_registers dreg:8;
+ enum nft_byteorder_ops op:8;
+ u8 len;
+ u8 size;
+};
+
+static void nft_byteorder_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_byteorder *priv = nft_expr_priv(expr);
+ struct nft_data *src = &data[priv->sreg], *dst = &data[priv->dreg];
+ union { u32 u32; u16 u16; } *s, *d;
+ unsigned int i;
+
+ s = (void *)src->data;
+ d = (void *)dst->data;
+
+ switch (priv->size) {
+ case 4:
+ switch (priv->op) {
+ case NFT_BYTEORDER_NTOH:
+ for (i = 0; i < priv->len / 4; i++)
+ d[i].u32 = ntohl((__force __be32)s[i].u32);
+ break;
+ case NFT_BYTEORDER_HTON:
+ for (i = 0; i < priv->len / 4; i++)
+ d[i].u32 = (__force __u32)htonl(s[i].u32);
+ break;
+ }
+ break;
+ case 2:
+ switch (priv->op) {
+ case NFT_BYTEORDER_NTOH:
+ for (i = 0; i < priv->len / 2; i++)
+ d[i].u16 = ntohs((__force __be16)s[i].u16);
+ break;
+ case NFT_BYTEORDER_HTON:
+ for (i = 0; i < priv->len / 2; i++)
+ d[i].u16 = (__force __u16)htons(s[i].u16);
+ break;
+ }
+ break;
+ }
+}
+
+static const struct nla_policy nft_byteorder_policy[NFTA_BYTEORDER_MAX + 1] = {
+ [NFTA_BYTEORDER_SREG] = { .type = NLA_U32 },
+ [NFTA_BYTEORDER_DREG] = { .type = NLA_U32 },
+ [NFTA_BYTEORDER_OP] = { .type = NLA_U32 },
+ [NFTA_BYTEORDER_LEN] = { .type = NLA_U32 },
+ [NFTA_BYTEORDER_SIZE] = { .type = NLA_U32 },
+};
+
+static int nft_byteorder_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_byteorder *priv = nft_expr_priv(expr);
+ int err;
+
+ if (tb[NFTA_BYTEORDER_SREG] == NULL ||
+ tb[NFTA_BYTEORDER_DREG] == NULL ||
+ tb[NFTA_BYTEORDER_LEN] == NULL ||
+ tb[NFTA_BYTEORDER_SIZE] == NULL ||
+ tb[NFTA_BYTEORDER_OP] == NULL)
+ return -EINVAL;
+
+ priv->sreg = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_SREG]));
+ err = nft_validate_input_register(priv->sreg);
+ if (err < 0)
+ return err;
+
+ priv->dreg = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_DREG]));
+ err = nft_validate_output_register(priv->dreg);
+ if (err < 0)
+ return err;
+ err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
+ if (err < 0)
+ return err;
+
+ priv->op = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_OP]));
+ switch (priv->op) {
+ case NFT_BYTEORDER_NTOH:
+ case NFT_BYTEORDER_HTON:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ priv->len = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_LEN]));
+ if (priv->len == 0 || priv->len > FIELD_SIZEOF(struct nft_data, data))
+ return -EINVAL;
+
+ priv->size = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_SIZE]));
+ switch (priv->size) {
+ case 2:
+ case 4:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int nft_byteorder_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_byteorder *priv = nft_expr_priv(expr);
+
+ if (nla_put_be32(skb, NFTA_BYTEORDER_SREG, htonl(priv->sreg)))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_BYTEORDER_DREG, htonl(priv->dreg)))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_BYTEORDER_OP, htonl(priv->op)))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_BYTEORDER_LEN, htonl(priv->len)))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_BYTEORDER_SIZE, htonl(priv->size)))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static struct nft_expr_type nft_byteorder_type;
+static const struct nft_expr_ops nft_byteorder_ops = {
+ .type = &nft_byteorder_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_byteorder)),
+ .eval = nft_byteorder_eval,
+ .init = nft_byteorder_init,
+ .dump = nft_byteorder_dump,
+};
+
+static struct nft_expr_type nft_byteorder_type __read_mostly = {
+ .name = "byteorder",
+ .ops = &nft_byteorder_ops,
+ .policy = nft_byteorder_policy,
+ .maxattr = NFTA_BYTEORDER_MAX,
+ .owner = THIS_MODULE,
+};
+
+int __init nft_byteorder_module_init(void)
+{
+ return nft_register_expr(&nft_byteorder_type);
+}
+
+void nft_byteorder_module_exit(void)
+{
+ nft_unregister_expr(&nft_byteorder_type);
+}
diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c
new file mode 100644
index 000000000000..954925db414d
--- /dev/null
+++ b/net/netfilter/nft_cmp.c
@@ -0,0 +1,223 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_cmp_expr {
+ struct nft_data data;
+ enum nft_registers sreg:8;
+ u8 len;
+ enum nft_cmp_ops op:8;
+};
+
+static void nft_cmp_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_cmp_expr *priv = nft_expr_priv(expr);
+ int d;
+
+ d = nft_data_cmp(&data[priv->sreg], &priv->data, priv->len);
+ switch (priv->op) {
+ case NFT_CMP_EQ:
+ if (d != 0)
+ goto mismatch;
+ break;
+ case NFT_CMP_NEQ:
+ if (d == 0)
+ goto mismatch;
+ break;
+ case NFT_CMP_LT:
+ if (d == 0)
+ goto mismatch;
+ case NFT_CMP_LTE:
+ if (d > 0)
+ goto mismatch;
+ break;
+ case NFT_CMP_GT:
+ if (d == 0)
+ goto mismatch;
+ case NFT_CMP_GTE:
+ if (d < 0)
+ goto mismatch;
+ break;
+ }
+ return;
+
+mismatch:
+ data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+}
+
+static const struct nla_policy nft_cmp_policy[NFTA_CMP_MAX + 1] = {
+ [NFTA_CMP_SREG] = { .type = NLA_U32 },
+ [NFTA_CMP_OP] = { .type = NLA_U32 },
+ [NFTA_CMP_DATA] = { .type = NLA_NESTED },
+};
+
+static int nft_cmp_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_cmp_expr *priv = nft_expr_priv(expr);
+ struct nft_data_desc desc;
+ int err;
+
+ priv->sreg = ntohl(nla_get_be32(tb[NFTA_CMP_SREG]));
+ priv->op = ntohl(nla_get_be32(tb[NFTA_CMP_OP]));
+
+ err = nft_data_init(NULL, &priv->data, &desc, tb[NFTA_CMP_DATA]);
+ BUG_ON(err < 0);
+
+ priv->len = desc.len;
+ return 0;
+}
+
+static int nft_cmp_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_cmp_expr *priv = nft_expr_priv(expr);
+
+ if (nla_put_be32(skb, NFTA_CMP_SREG, htonl(priv->sreg)))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_CMP_OP, htonl(priv->op)))
+ goto nla_put_failure;
+
+ if (nft_data_dump(skb, NFTA_CMP_DATA, &priv->data,
+ NFT_DATA_VALUE, priv->len) < 0)
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static struct nft_expr_type nft_cmp_type;
+static const struct nft_expr_ops nft_cmp_ops = {
+ .type = &nft_cmp_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_cmp_expr)),
+ .eval = nft_cmp_eval,
+ .init = nft_cmp_init,
+ .dump = nft_cmp_dump,
+};
+
+static int nft_cmp_fast_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_cmp_fast_expr *priv = nft_expr_priv(expr);
+ struct nft_data_desc desc;
+ struct nft_data data;
+ u32 mask;
+ int err;
+
+ priv->sreg = ntohl(nla_get_be32(tb[NFTA_CMP_SREG]));
+
+ err = nft_data_init(NULL, &data, &desc, tb[NFTA_CMP_DATA]);
+ BUG_ON(err < 0);
+ desc.len *= BITS_PER_BYTE;
+
+ mask = ~0U >> (sizeof(priv->data) * BITS_PER_BYTE - desc.len);
+ priv->data = data.data[0] & mask;
+ priv->len = desc.len;
+ return 0;
+}
+
+static int nft_cmp_fast_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_cmp_fast_expr *priv = nft_expr_priv(expr);
+ struct nft_data data;
+
+ if (nla_put_be32(skb, NFTA_CMP_SREG, htonl(priv->sreg)))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_CMP_OP, htonl(NFT_CMP_EQ)))
+ goto nla_put_failure;
+
+ data.data[0] = priv->data;
+ if (nft_data_dump(skb, NFTA_CMP_DATA, &data,
+ NFT_DATA_VALUE, priv->len / BITS_PER_BYTE) < 0)
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+const struct nft_expr_ops nft_cmp_fast_ops = {
+ .type = &nft_cmp_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_cmp_fast_expr)),
+ .eval = NULL, /* inlined */
+ .init = nft_cmp_fast_init,
+ .dump = nft_cmp_fast_dump,
+};
+
+static const struct nft_expr_ops *
+nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[])
+{
+ struct nft_data_desc desc;
+ struct nft_data data;
+ enum nft_registers sreg;
+ enum nft_cmp_ops op;
+ int err;
+
+ if (tb[NFTA_CMP_SREG] == NULL ||
+ tb[NFTA_CMP_OP] == NULL ||
+ tb[NFTA_CMP_DATA] == NULL)
+ return ERR_PTR(-EINVAL);
+
+ sreg = ntohl(nla_get_be32(tb[NFTA_CMP_SREG]));
+ err = nft_validate_input_register(sreg);
+ if (err < 0)
+ return ERR_PTR(err);
+
+ op = ntohl(nla_get_be32(tb[NFTA_CMP_OP]));
+ switch (op) {
+ case NFT_CMP_EQ:
+ case NFT_CMP_NEQ:
+ case NFT_CMP_LT:
+ case NFT_CMP_LTE:
+ case NFT_CMP_GT:
+ case NFT_CMP_GTE:
+ break;
+ default:
+ return ERR_PTR(-EINVAL);
+ }
+
+ err = nft_data_init(NULL, &data, &desc, tb[NFTA_CMP_DATA]);
+ if (err < 0)
+ return ERR_PTR(err);
+
+ if (desc.len <= sizeof(u32) && op == NFT_CMP_EQ)
+ return &nft_cmp_fast_ops;
+ else
+ return &nft_cmp_ops;
+}
+
+static struct nft_expr_type nft_cmp_type __read_mostly = {
+ .name = "cmp",
+ .select_ops = nft_cmp_select_ops,
+ .policy = nft_cmp_policy,
+ .maxattr = NFTA_CMP_MAX,
+ .owner = THIS_MODULE,
+};
+
+int __init nft_cmp_module_init(void)
+{
+ return nft_register_expr(&nft_cmp_type);
+}
+
+void nft_cmp_module_exit(void)
+{
+ nft_unregister_expr(&nft_cmp_type);
+}
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
new file mode 100644
index 000000000000..4811f762e060
--- /dev/null
+++ b/net/netfilter/nft_compat.c
@@ -0,0 +1,768 @@
+/*
+ * (C) 2012-2013 by Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This software has been sponsored by Sophos Astaro <http://www.sophos.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nf_tables.h>
+#include <linux/netfilter/nf_tables_compat.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <asm/uaccess.h> /* for set_fs */
+#include <net/netfilter/nf_tables.h>
+
+union nft_entry {
+ struct ipt_entry e4;
+ struct ip6t_entry e6;
+};
+
+static inline void
+nft_compat_set_par(struct xt_action_param *par, void *xt, const void *xt_info)
+{
+ par->target = xt;
+ par->targinfo = xt_info;
+ par->hotdrop = false;
+}
+
+static void nft_target_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
+{
+ void *info = nft_expr_priv(expr);
+ struct xt_target *target = expr->ops->data;
+ struct sk_buff *skb = pkt->skb;
+ int ret;
+
+ nft_compat_set_par((struct xt_action_param *)&pkt->xt, target, info);
+
+ ret = target->target(skb, &pkt->xt);
+
+ if (pkt->xt.hotdrop)
+ ret = NF_DROP;
+
+ switch(ret) {
+ case XT_CONTINUE:
+ data[NFT_REG_VERDICT].verdict = NFT_CONTINUE;
+ break;
+ default:
+ data[NFT_REG_VERDICT].verdict = ret;
+ break;
+ }
+ return;
+}
+
+static const struct nla_policy nft_target_policy[NFTA_TARGET_MAX + 1] = {
+ [NFTA_TARGET_NAME] = { .type = NLA_NUL_STRING },
+ [NFTA_TARGET_REV] = { .type = NLA_U32 },
+ [NFTA_TARGET_INFO] = { .type = NLA_BINARY },
+};
+
+static void
+nft_target_set_tgchk_param(struct xt_tgchk_param *par,
+ const struct nft_ctx *ctx,
+ struct xt_target *target, void *info,
+ union nft_entry *entry, u8 proto, bool inv)
+{
+ par->net = &init_net;
+ par->table = ctx->table->name;
+ switch (ctx->afi->family) {
+ case AF_INET:
+ entry->e4.ip.proto = proto;
+ entry->e4.ip.invflags = inv ? IPT_INV_PROTO : 0;
+ break;
+ case AF_INET6:
+ entry->e6.ipv6.proto = proto;
+ entry->e6.ipv6.invflags = inv ? IP6T_INV_PROTO : 0;
+ break;
+ }
+ par->entryinfo = entry;
+ par->target = target;
+ par->targinfo = info;
+ if (ctx->chain->flags & NFT_BASE_CHAIN) {
+ const struct nft_base_chain *basechain =
+ nft_base_chain(ctx->chain);
+ const struct nf_hook_ops *ops = &basechain->ops;
+
+ par->hook_mask = 1 << ops->hooknum;
+ }
+ par->family = ctx->afi->family;
+}
+
+static void target_compat_from_user(struct xt_target *t, void *in, void *out)
+{
+#ifdef CONFIG_COMPAT
+ if (t->compat_from_user) {
+ int pad;
+
+ t->compat_from_user(out, in);
+ pad = XT_ALIGN(t->targetsize) - t->targetsize;
+ if (pad > 0)
+ memset(out + t->targetsize, 0, pad);
+ } else
+#endif
+ memcpy(out, in, XT_ALIGN(t->targetsize));
+}
+
+static inline int nft_compat_target_offset(struct xt_target *target)
+{
+#ifdef CONFIG_COMPAT
+ return xt_compat_target_offset(target);
+#else
+ return 0;
+#endif
+}
+
+static const struct nla_policy nft_rule_compat_policy[NFTA_RULE_COMPAT_MAX + 1] = {
+ [NFTA_RULE_COMPAT_PROTO] = { .type = NLA_U32 },
+ [NFTA_RULE_COMPAT_FLAGS] = { .type = NLA_U32 },
+};
+
+static u8 nft_parse_compat(const struct nlattr *attr, bool *inv)
+{
+ struct nlattr *tb[NFTA_RULE_COMPAT_MAX+1];
+ u32 flags;
+ int err;
+
+ err = nla_parse_nested(tb, NFTA_RULE_COMPAT_MAX, attr,
+ nft_rule_compat_policy);
+ if (err < 0)
+ return err;
+
+ if (!tb[NFTA_RULE_COMPAT_PROTO] || !tb[NFTA_RULE_COMPAT_FLAGS])
+ return -EINVAL;
+
+ flags = ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_FLAGS]));
+ if (flags & ~NFT_RULE_COMPAT_F_MASK)
+ return -EINVAL;
+ if (flags & NFT_RULE_COMPAT_F_INV)
+ *inv = true;
+
+ return ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_PROTO]));
+}
+
+static int
+nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ void *info = nft_expr_priv(expr);
+ struct xt_target *target = expr->ops->data;
+ struct xt_tgchk_param par;
+ size_t size = XT_ALIGN(nla_len(tb[NFTA_TARGET_INFO]));
+ u8 proto = 0;
+ bool inv = false;
+ union nft_entry e = {};
+ int ret;
+
+ target_compat_from_user(target, nla_data(tb[NFTA_TARGET_INFO]), info);
+
+ if (ctx->nla[NFTA_RULE_COMPAT])
+ proto = nft_parse_compat(ctx->nla[NFTA_RULE_COMPAT], &inv);
+
+ nft_target_set_tgchk_param(&par, ctx, target, info, &e, proto, inv);
+
+ ret = xt_check_target(&par, size, proto, inv);
+ if (ret < 0)
+ goto err;
+
+ /* The standard target cannot be used */
+ if (target->target == NULL) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ return 0;
+err:
+ module_put(target->me);
+ return ret;
+}
+
+static void
+nft_target_destroy(const struct nft_expr *expr)
+{
+ struct xt_target *target = expr->ops->data;
+
+ module_put(target->me);
+}
+
+static int
+target_dump_info(struct sk_buff *skb, const struct xt_target *t, const void *in)
+{
+ int ret;
+
+#ifdef CONFIG_COMPAT
+ if (t->compat_to_user) {
+ mm_segment_t old_fs;
+ void *out;
+
+ out = kmalloc(XT_ALIGN(t->targetsize), GFP_ATOMIC);
+ if (out == NULL)
+ return -ENOMEM;
+
+ /* We want to reuse existing compat_to_user */
+ old_fs = get_fs();
+ set_fs(KERNEL_DS);
+ t->compat_to_user(out, in);
+ set_fs(old_fs);
+ ret = nla_put(skb, NFTA_TARGET_INFO, XT_ALIGN(t->targetsize), out);
+ kfree(out);
+ } else
+#endif
+ ret = nla_put(skb, NFTA_TARGET_INFO, XT_ALIGN(t->targetsize), in);
+
+ return ret;
+}
+
+static int nft_target_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct xt_target *target = expr->ops->data;
+ void *info = nft_expr_priv(expr);
+
+ if (nla_put_string(skb, NFTA_TARGET_NAME, target->name) ||
+ nla_put_be32(skb, NFTA_TARGET_REV, htonl(target->revision)) ||
+ target_dump_info(skb, target, info))
+ goto nla_put_failure;
+
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static int nft_target_validate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nft_data **data)
+{
+ struct xt_target *target = expr->ops->data;
+ unsigned int hook_mask = 0;
+
+ if (ctx->chain->flags & NFT_BASE_CHAIN) {
+ const struct nft_base_chain *basechain =
+ nft_base_chain(ctx->chain);
+ const struct nf_hook_ops *ops = &basechain->ops;
+
+ hook_mask = 1 << ops->hooknum;
+ if (hook_mask & target->hooks)
+ return 0;
+
+ /* This target is being called from an invalid chain */
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static void nft_match_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
+{
+ void *info = nft_expr_priv(expr);
+ struct xt_match *match = expr->ops->data;
+ struct sk_buff *skb = pkt->skb;
+ bool ret;
+
+ nft_compat_set_par((struct xt_action_param *)&pkt->xt, match, info);
+
+ ret = match->match(skb, (struct xt_action_param *)&pkt->xt);
+
+ if (pkt->xt.hotdrop) {
+ data[NFT_REG_VERDICT].verdict = NF_DROP;
+ return;
+ }
+
+ switch(ret) {
+ case true:
+ data[NFT_REG_VERDICT].verdict = NFT_CONTINUE;
+ break;
+ case false:
+ data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+ break;
+ }
+}
+
+static const struct nla_policy nft_match_policy[NFTA_MATCH_MAX + 1] = {
+ [NFTA_MATCH_NAME] = { .type = NLA_NUL_STRING },
+ [NFTA_MATCH_REV] = { .type = NLA_U32 },
+ [NFTA_MATCH_INFO] = { .type = NLA_BINARY },
+};
+
+/* struct xt_mtchk_param and xt_tgchk_param look very similar */
+static void
+nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx,
+ struct xt_match *match, void *info,
+ union nft_entry *entry, u8 proto, bool inv)
+{
+ par->net = &init_net;
+ par->table = ctx->table->name;
+ switch (ctx->afi->family) {
+ case AF_INET:
+ entry->e4.ip.proto = proto;
+ entry->e4.ip.invflags = inv ? IPT_INV_PROTO : 0;
+ break;
+ case AF_INET6:
+ entry->e6.ipv6.proto = proto;
+ entry->e6.ipv6.invflags = inv ? IP6T_INV_PROTO : 0;
+ break;
+ }
+ par->entryinfo = entry;
+ par->match = match;
+ par->matchinfo = info;
+ if (ctx->chain->flags & NFT_BASE_CHAIN) {
+ const struct nft_base_chain *basechain =
+ nft_base_chain(ctx->chain);
+ const struct nf_hook_ops *ops = &basechain->ops;
+
+ par->hook_mask = 1 << ops->hooknum;
+ }
+ par->family = ctx->afi->family;
+}
+
+static void match_compat_from_user(struct xt_match *m, void *in, void *out)
+{
+#ifdef CONFIG_COMPAT
+ if (m->compat_from_user) {
+ int pad;
+
+ m->compat_from_user(out, in);
+ pad = XT_ALIGN(m->matchsize) - m->matchsize;
+ if (pad > 0)
+ memset(out + m->matchsize, 0, pad);
+ } else
+#endif
+ memcpy(out, in, XT_ALIGN(m->matchsize));
+}
+
+static int
+nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ void *info = nft_expr_priv(expr);
+ struct xt_match *match = expr->ops->data;
+ struct xt_mtchk_param par;
+ size_t size = XT_ALIGN(nla_len(tb[NFTA_MATCH_INFO]));
+ u8 proto = 0;
+ bool inv = false;
+ union nft_entry e = {};
+ int ret;
+
+ match_compat_from_user(match, nla_data(tb[NFTA_MATCH_INFO]), info);
+
+ if (ctx->nla[NFTA_RULE_COMPAT])
+ proto = nft_parse_compat(ctx->nla[NFTA_RULE_COMPAT], &inv);
+
+ nft_match_set_mtchk_param(&par, ctx, match, info, &e, proto, inv);
+
+ ret = xt_check_match(&par, size, proto, inv);
+ if (ret < 0)
+ goto err;
+
+ return 0;
+err:
+ module_put(match->me);
+ return ret;
+}
+
+static void
+nft_match_destroy(const struct nft_expr *expr)
+{
+ struct xt_match *match = expr->ops->data;
+
+ module_put(match->me);
+}
+
+static int
+match_dump_info(struct sk_buff *skb, const struct xt_match *m, const void *in)
+{
+ int ret;
+
+#ifdef CONFIG_COMPAT
+ if (m->compat_to_user) {
+ mm_segment_t old_fs;
+ void *out;
+
+ out = kmalloc(XT_ALIGN(m->matchsize), GFP_ATOMIC);
+ if (out == NULL)
+ return -ENOMEM;
+
+ /* We want to reuse existing compat_to_user */
+ old_fs = get_fs();
+ set_fs(KERNEL_DS);
+ m->compat_to_user(out, in);
+ set_fs(old_fs);
+ ret = nla_put(skb, NFTA_MATCH_INFO, XT_ALIGN(m->matchsize), out);
+ kfree(out);
+ } else
+#endif
+ ret = nla_put(skb, NFTA_MATCH_INFO, XT_ALIGN(m->matchsize), in);
+
+ return ret;
+}
+
+static inline int nft_compat_match_offset(struct xt_match *match)
+{
+#ifdef CONFIG_COMPAT
+ return xt_compat_match_offset(match);
+#else
+ return 0;
+#endif
+}
+
+static int nft_match_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ void *info = nft_expr_priv(expr);
+ struct xt_match *match = expr->ops->data;
+
+ if (nla_put_string(skb, NFTA_MATCH_NAME, match->name) ||
+ nla_put_be32(skb, NFTA_MATCH_REV, htonl(match->revision)) ||
+ match_dump_info(skb, match, info))
+ goto nla_put_failure;
+
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static int nft_match_validate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nft_data **data)
+{
+ struct xt_match *match = expr->ops->data;
+ unsigned int hook_mask = 0;
+
+ if (ctx->chain->flags & NFT_BASE_CHAIN) {
+ const struct nft_base_chain *basechain =
+ nft_base_chain(ctx->chain);
+ const struct nf_hook_ops *ops = &basechain->ops;
+
+ hook_mask = 1 << ops->hooknum;
+ if (hook_mask & match->hooks)
+ return 0;
+
+ /* This match is being called from an invalid chain */
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int
+nfnl_compat_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
+ int event, u16 family, const char *name,
+ int rev, int target)
+{
+ struct nlmsghdr *nlh;
+ struct nfgenmsg *nfmsg;
+ unsigned int flags = portid ? NLM_F_MULTI : 0;
+
+ event |= NFNL_SUBSYS_NFT_COMPAT << 8;
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
+ if (nlh == NULL)
+ goto nlmsg_failure;
+
+ nfmsg = nlmsg_data(nlh);
+ nfmsg->nfgen_family = family;
+ nfmsg->version = NFNETLINK_V0;
+ nfmsg->res_id = 0;
+
+ if (nla_put_string(skb, NFTA_COMPAT_NAME, name) ||
+ nla_put_be32(skb, NFTA_COMPAT_REV, htonl(rev)) ||
+ nla_put_be32(skb, NFTA_COMPAT_TYPE, htonl(target)))
+ goto nla_put_failure;
+
+ nlmsg_end(skb, nlh);
+ return skb->len;
+
+nlmsg_failure:
+nla_put_failure:
+ nlmsg_cancel(skb, nlh);
+ return -1;
+}
+
+static int
+nfnl_compat_get(struct sock *nfnl, struct sk_buff *skb,
+ const struct nlmsghdr *nlh, const struct nlattr * const tb[])
+{
+ int ret = 0, target;
+ struct nfgenmsg *nfmsg;
+ const char *fmt;
+ const char *name;
+ u32 rev;
+ struct sk_buff *skb2;
+
+ if (tb[NFTA_COMPAT_NAME] == NULL ||
+ tb[NFTA_COMPAT_REV] == NULL ||
+ tb[NFTA_COMPAT_TYPE] == NULL)
+ return -EINVAL;
+
+ name = nla_data(tb[NFTA_COMPAT_NAME]);
+ rev = ntohl(nla_get_be32(tb[NFTA_COMPAT_REV]));
+ target = ntohl(nla_get_be32(tb[NFTA_COMPAT_TYPE]));
+
+ nfmsg = nlmsg_data(nlh);
+
+ switch(nfmsg->nfgen_family) {
+ case AF_INET:
+ fmt = "ipt_%s";
+ break;
+ case AF_INET6:
+ fmt = "ip6t_%s";
+ break;
+ default:
+ pr_err("nft_compat: unsupported protocol %d\n",
+ nfmsg->nfgen_family);
+ return -EINVAL;
+ }
+
+ try_then_request_module(xt_find_revision(nfmsg->nfgen_family, name,
+ rev, target, &ret),
+ fmt, name);
+
+ if (ret < 0)
+ return ret;
+
+ skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (skb2 == NULL)
+ return -ENOMEM;
+
+ /* include the best revision for this extension in the message */
+ if (nfnl_compat_fill_info(skb2, NETLINK_CB(skb).portid,
+ nlh->nlmsg_seq,
+ NFNL_MSG_TYPE(nlh->nlmsg_type),
+ NFNL_MSG_COMPAT_GET,
+ nfmsg->nfgen_family,
+ name, ret, target) <= 0) {
+ kfree_skb(skb2);
+ return -ENOSPC;
+ }
+
+ ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid,
+ MSG_DONTWAIT);
+ if (ret > 0)
+ ret = 0;
+
+ return ret == -EAGAIN ? -ENOBUFS : ret;
+}
+
+static const struct nla_policy nfnl_compat_policy_get[NFTA_COMPAT_MAX+1] = {
+ [NFTA_COMPAT_NAME] = { .type = NLA_NUL_STRING,
+ .len = NFT_COMPAT_NAME_MAX-1 },
+ [NFTA_COMPAT_REV] = { .type = NLA_U32 },
+ [NFTA_COMPAT_TYPE] = { .type = NLA_U32 },
+};
+
+static const struct nfnl_callback nfnl_nft_compat_cb[NFNL_MSG_COMPAT_MAX] = {
+ [NFNL_MSG_COMPAT_GET] = { .call = nfnl_compat_get,
+ .attr_count = NFTA_COMPAT_MAX,
+ .policy = nfnl_compat_policy_get },
+};
+
+static const struct nfnetlink_subsystem nfnl_compat_subsys = {
+ .name = "nft-compat",
+ .subsys_id = NFNL_SUBSYS_NFT_COMPAT,
+ .cb_count = NFNL_MSG_COMPAT_MAX,
+ .cb = nfnl_nft_compat_cb,
+};
+
+static LIST_HEAD(nft_match_list);
+
+struct nft_xt {
+ struct list_head head;
+ struct nft_expr_ops ops;
+};
+
+static struct nft_expr_type nft_match_type;
+
+static const struct nft_expr_ops *
+nft_match_select_ops(const struct nft_ctx *ctx,
+ const struct nlattr * const tb[])
+{
+ struct nft_xt *nft_match;
+ struct xt_match *match;
+ char *mt_name;
+ __u32 rev, family;
+
+ if (tb[NFTA_MATCH_NAME] == NULL ||
+ tb[NFTA_MATCH_REV] == NULL ||
+ tb[NFTA_MATCH_INFO] == NULL)
+ return ERR_PTR(-EINVAL);
+
+ mt_name = nla_data(tb[NFTA_MATCH_NAME]);
+ rev = ntohl(nla_get_be32(tb[NFTA_MATCH_REV]));
+ family = ctx->afi->family;
+
+ /* Re-use the existing match if it's already loaded. */
+ list_for_each_entry(nft_match, &nft_match_list, head) {
+ struct xt_match *match = nft_match->ops.data;
+
+ if (strcmp(match->name, mt_name) == 0 &&
+ match->revision == rev && match->family == family)
+ return &nft_match->ops;
+ }
+
+ match = xt_request_find_match(family, mt_name, rev);
+ if (IS_ERR(match))
+ return ERR_PTR(-ENOENT);
+
+ /* This is the first time we use this match, allocate operations */
+ nft_match = kzalloc(sizeof(struct nft_xt), GFP_KERNEL);
+ if (nft_match == NULL)
+ return ERR_PTR(-ENOMEM);
+
+ nft_match->ops.type = &nft_match_type;
+ nft_match->ops.size = NFT_EXPR_SIZE(XT_ALIGN(match->matchsize) +
+ nft_compat_match_offset(match));
+ nft_match->ops.eval = nft_match_eval;
+ nft_match->ops.init = nft_match_init;
+ nft_match->ops.destroy = nft_match_destroy;
+ nft_match->ops.dump = nft_match_dump;
+ nft_match->ops.validate = nft_match_validate;
+ nft_match->ops.data = match;
+
+ list_add(&nft_match->head, &nft_match_list);
+
+ return &nft_match->ops;
+}
+
+static void nft_match_release(void)
+{
+ struct nft_xt *nft_match;
+
+ list_for_each_entry(nft_match, &nft_match_list, head)
+ kfree(nft_match);
+}
+
+static struct nft_expr_type nft_match_type __read_mostly = {
+ .name = "match",
+ .select_ops = nft_match_select_ops,
+ .policy = nft_match_policy,
+ .maxattr = NFTA_MATCH_MAX,
+ .owner = THIS_MODULE,
+};
+
+static LIST_HEAD(nft_target_list);
+
+static struct nft_expr_type nft_target_type;
+
+static const struct nft_expr_ops *
+nft_target_select_ops(const struct nft_ctx *ctx,
+ const struct nlattr * const tb[])
+{
+ struct nft_xt *nft_target;
+ struct xt_target *target;
+ char *tg_name;
+ __u32 rev, family;
+
+ if (tb[NFTA_TARGET_NAME] == NULL ||
+ tb[NFTA_TARGET_REV] == NULL ||
+ tb[NFTA_TARGET_INFO] == NULL)
+ return ERR_PTR(-EINVAL);
+
+ tg_name = nla_data(tb[NFTA_TARGET_NAME]);
+ rev = ntohl(nla_get_be32(tb[NFTA_TARGET_REV]));
+ family = ctx->afi->family;
+
+ /* Re-use the existing target if it's already loaded. */
+ list_for_each_entry(nft_target, &nft_match_list, head) {
+ struct xt_target *target = nft_target->ops.data;
+
+ if (strcmp(target->name, tg_name) == 0 &&
+ target->revision == rev && target->family == family)
+ return &nft_target->ops;
+ }
+
+ target = xt_request_find_target(family, tg_name, rev);
+ if (IS_ERR(target))
+ return ERR_PTR(-ENOENT);
+
+ /* This is the first time we use this target, allocate operations */
+ nft_target = kzalloc(sizeof(struct nft_xt), GFP_KERNEL);
+ if (nft_target == NULL)
+ return ERR_PTR(-ENOMEM);
+
+ nft_target->ops.type = &nft_target_type;
+ nft_target->ops.size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize) +
+ nft_compat_target_offset(target));
+ nft_target->ops.eval = nft_target_eval;
+ nft_target->ops.init = nft_target_init;
+ nft_target->ops.destroy = nft_target_destroy;
+ nft_target->ops.dump = nft_target_dump;
+ nft_target->ops.validate = nft_target_validate;
+ nft_target->ops.data = target;
+
+ list_add(&nft_target->head, &nft_target_list);
+
+ return &nft_target->ops;
+}
+
+static void nft_target_release(void)
+{
+ struct nft_xt *nft_target;
+
+ list_for_each_entry(nft_target, &nft_target_list, head)
+ kfree(nft_target);
+}
+
+static struct nft_expr_type nft_target_type __read_mostly = {
+ .name = "target",
+ .select_ops = nft_target_select_ops,
+ .policy = nft_target_policy,
+ .maxattr = NFTA_TARGET_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_compat_module_init(void)
+{
+ int ret;
+
+ ret = nft_register_expr(&nft_match_type);
+ if (ret < 0)
+ return ret;
+
+ ret = nft_register_expr(&nft_target_type);
+ if (ret < 0)
+ goto err_match;
+
+ ret = nfnetlink_subsys_register(&nfnl_compat_subsys);
+ if (ret < 0) {
+ pr_err("nft_compat: cannot register with nfnetlink.\n");
+ goto err_target;
+ }
+
+ pr_info("nf_tables_compat: (c) 2012 Pablo Neira Ayuso <pablo@netfilter.org>\n");
+
+ return ret;
+
+err_target:
+ nft_unregister_expr(&nft_target_type);
+err_match:
+ nft_unregister_expr(&nft_match_type);
+ return ret;
+}
+
+static void __exit nft_compat_module_exit(void)
+{
+ nfnetlink_subsys_unregister(&nfnl_compat_subsys);
+ nft_unregister_expr(&nft_target_type);
+ nft_unregister_expr(&nft_match_type);
+ nft_match_release();
+ nft_target_release();
+}
+
+MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_NFT_COMPAT);
+
+module_init(nft_compat_module_init);
+module_exit(nft_compat_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_ALIAS_NFT_EXPR("match");
+MODULE_ALIAS_NFT_EXPR("target");
diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
new file mode 100644
index 000000000000..c89ee486ce54
--- /dev/null
+++ b/net/netfilter/nft_counter.c
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/seqlock.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_counter {
+ seqlock_t lock;
+ u64 bytes;
+ u64 packets;
+};
+
+static void nft_counter_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_counter *priv = nft_expr_priv(expr);
+
+ write_seqlock_bh(&priv->lock);
+ priv->bytes += pkt->skb->len;
+ priv->packets++;
+ write_sequnlock_bh(&priv->lock);
+}
+
+static int nft_counter_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ struct nft_counter *priv = nft_expr_priv(expr);
+ unsigned int seq;
+ u64 bytes;
+ u64 packets;
+
+ do {
+ seq = read_seqbegin(&priv->lock);
+ bytes = priv->bytes;
+ packets = priv->packets;
+ } while (read_seqretry(&priv->lock, seq));
+
+ if (nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(bytes)))
+ goto nla_put_failure;
+ if (nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(packets)))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static const struct nla_policy nft_counter_policy[NFTA_COUNTER_MAX + 1] = {
+ [NFTA_COUNTER_PACKETS] = { .type = NLA_U64 },
+ [NFTA_COUNTER_BYTES] = { .type = NLA_U64 },
+};
+
+static int nft_counter_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_counter *priv = nft_expr_priv(expr);
+
+ if (tb[NFTA_COUNTER_PACKETS])
+ priv->packets = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS]));
+ if (tb[NFTA_COUNTER_BYTES])
+ priv->bytes = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES]));
+
+ seqlock_init(&priv->lock);
+ return 0;
+}
+
+static struct nft_expr_type nft_counter_type;
+static const struct nft_expr_ops nft_counter_ops = {
+ .type = &nft_counter_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_counter)),
+ .eval = nft_counter_eval,
+ .init = nft_counter_init,
+ .dump = nft_counter_dump,
+};
+
+static struct nft_expr_type nft_counter_type __read_mostly = {
+ .name = "counter",
+ .ops = &nft_counter_ops,
+ .policy = nft_counter_policy,
+ .maxattr = NFTA_COUNTER_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_counter_module_init(void)
+{
+ return nft_register_expr(&nft_counter_type);
+}
+
+static void __exit nft_counter_module_exit(void)
+{
+ nft_unregister_expr(&nft_counter_type);
+}
+
+module_init(nft_counter_module_init);
+module_exit(nft_counter_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("counter");
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
new file mode 100644
index 000000000000..955f4e6e7089
--- /dev/null
+++ b/net/netfilter/nft_ct.c
@@ -0,0 +1,258 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_tuple.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+
+struct nft_ct {
+ enum nft_ct_keys key:8;
+ enum ip_conntrack_dir dir:8;
+ enum nft_registers dreg:8;
+ uint8_t family;
+};
+
+static void nft_ct_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_ct *priv = nft_expr_priv(expr);
+ struct nft_data *dest = &data[priv->dreg];
+ enum ip_conntrack_info ctinfo;
+ const struct nf_conn *ct;
+ const struct nf_conn_help *help;
+ const struct nf_conntrack_tuple *tuple;
+ const struct nf_conntrack_helper *helper;
+ long diff;
+ unsigned int state;
+
+ ct = nf_ct_get(pkt->skb, &ctinfo);
+
+ switch (priv->key) {
+ case NFT_CT_STATE:
+ if (ct == NULL)
+ state = NF_CT_STATE_INVALID_BIT;
+ else if (nf_ct_is_untracked(ct))
+ state = NF_CT_STATE_UNTRACKED_BIT;
+ else
+ state = NF_CT_STATE_BIT(ctinfo);
+ dest->data[0] = state;
+ return;
+ }
+
+ if (ct == NULL)
+ goto err;
+
+ switch (priv->key) {
+ case NFT_CT_DIRECTION:
+ dest->data[0] = CTINFO2DIR(ctinfo);
+ return;
+ case NFT_CT_STATUS:
+ dest->data[0] = ct->status;
+ return;
+#ifdef CONFIG_NF_CONNTRACK_MARK
+ case NFT_CT_MARK:
+ dest->data[0] = ct->mark;
+ return;
+#endif
+#ifdef CONFIG_NF_CONNTRACK_SECMARK
+ case NFT_CT_SECMARK:
+ dest->data[0] = ct->secmark;
+ return;
+#endif
+ case NFT_CT_EXPIRATION:
+ diff = (long)jiffies - (long)ct->timeout.expires;
+ if (diff < 0)
+ diff = 0;
+ dest->data[0] = jiffies_to_msecs(diff);
+ return;
+ case NFT_CT_HELPER:
+ if (ct->master == NULL)
+ goto err;
+ help = nfct_help(ct->master);
+ if (help == NULL)
+ goto err;
+ helper = rcu_dereference(help->helper);
+ if (helper == NULL)
+ goto err;
+ if (strlen(helper->name) >= sizeof(dest->data))
+ goto err;
+ strncpy((char *)dest->data, helper->name, sizeof(dest->data));
+ return;
+ }
+
+ tuple = &ct->tuplehash[priv->dir].tuple;
+ switch (priv->key) {
+ case NFT_CT_L3PROTOCOL:
+ dest->data[0] = nf_ct_l3num(ct);
+ return;
+ case NFT_CT_SRC:
+ memcpy(dest->data, tuple->src.u3.all,
+ nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16);
+ return;
+ case NFT_CT_DST:
+ memcpy(dest->data, tuple->dst.u3.all,
+ nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16);
+ return;
+ case NFT_CT_PROTOCOL:
+ dest->data[0] = nf_ct_protonum(ct);
+ return;
+ case NFT_CT_PROTO_SRC:
+ dest->data[0] = (__force __u16)tuple->src.u.all;
+ return;
+ case NFT_CT_PROTO_DST:
+ dest->data[0] = (__force __u16)tuple->dst.u.all;
+ return;
+ }
+ return;
+err:
+ data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+}
+
+static const struct nla_policy nft_ct_policy[NFTA_CT_MAX + 1] = {
+ [NFTA_CT_DREG] = { .type = NLA_U32 },
+ [NFTA_CT_KEY] = { .type = NLA_U32 },
+ [NFTA_CT_DIRECTION] = { .type = NLA_U8 },
+};
+
+static int nft_ct_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_ct *priv = nft_expr_priv(expr);
+ int err;
+
+ if (tb[NFTA_CT_DREG] == NULL ||
+ tb[NFTA_CT_KEY] == NULL)
+ return -EINVAL;
+
+ priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY]));
+ if (tb[NFTA_CT_DIRECTION] != NULL) {
+ priv->dir = nla_get_u8(tb[NFTA_CT_DIRECTION]);
+ switch (priv->dir) {
+ case IP_CT_DIR_ORIGINAL:
+ case IP_CT_DIR_REPLY:
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+
+ switch (priv->key) {
+ case NFT_CT_STATE:
+ case NFT_CT_DIRECTION:
+ case NFT_CT_STATUS:
+#ifdef CONFIG_NF_CONNTRACK_MARK
+ case NFT_CT_MARK:
+#endif
+#ifdef CONFIG_NF_CONNTRACK_SECMARK
+ case NFT_CT_SECMARK:
+#endif
+ case NFT_CT_EXPIRATION:
+ case NFT_CT_HELPER:
+ if (tb[NFTA_CT_DIRECTION] != NULL)
+ return -EINVAL;
+ break;
+ case NFT_CT_PROTOCOL:
+ case NFT_CT_SRC:
+ case NFT_CT_DST:
+ case NFT_CT_PROTO_SRC:
+ case NFT_CT_PROTO_DST:
+ if (tb[NFTA_CT_DIRECTION] == NULL)
+ return -EINVAL;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ err = nf_ct_l3proto_try_module_get(ctx->afi->family);
+ if (err < 0)
+ return err;
+ priv->family = ctx->afi->family;
+
+ priv->dreg = ntohl(nla_get_be32(tb[NFTA_CT_DREG]));
+ err = nft_validate_output_register(priv->dreg);
+ if (err < 0)
+ goto err1;
+
+ err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
+ if (err < 0)
+ goto err1;
+ return 0;
+
+err1:
+ nf_ct_l3proto_module_put(ctx->afi->family);
+ return err;
+}
+
+static void nft_ct_destroy(const struct nft_expr *expr)
+{
+ struct nft_ct *priv = nft_expr_priv(expr);
+
+ nf_ct_l3proto_module_put(priv->family);
+}
+
+static int nft_ct_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_ct *priv = nft_expr_priv(expr);
+
+ if (nla_put_be32(skb, NFTA_CT_DREG, htonl(priv->dreg)))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_CT_KEY, htonl(priv->key)))
+ goto nla_put_failure;
+ if (nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static struct nft_expr_type nft_ct_type;
+static const struct nft_expr_ops nft_ct_ops = {
+ .type = &nft_ct_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_ct)),
+ .eval = nft_ct_eval,
+ .init = nft_ct_init,
+ .destroy = nft_ct_destroy,
+ .dump = nft_ct_dump,
+};
+
+static struct nft_expr_type nft_ct_type __read_mostly = {
+ .name = "ct",
+ .ops = &nft_ct_ops,
+ .policy = nft_ct_policy,
+ .maxattr = NFTA_CT_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_ct_module_init(void)
+{
+ return nft_register_expr(&nft_ct_type);
+}
+
+static void __exit nft_ct_module_exit(void)
+{
+ nft_unregister_expr(&nft_ct_type);
+}
+
+module_init(nft_ct_module_init);
+module_exit(nft_ct_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("ct");
diff --git a/net/netfilter/nft_expr_template.c b/net/netfilter/nft_expr_template.c
new file mode 100644
index 000000000000..b6eed4d5a096
--- /dev/null
+++ b/net/netfilter/nft_expr_template.c
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_template {
+
+};
+
+static void nft_template_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_template *priv = nft_expr_priv(expr);
+
+}
+
+static const struct nla_policy nft_template_policy[NFTA_TEMPLATE_MAX + 1] = {
+ [NFTA_TEMPLATE_ATTR] = { .type = NLA_U32 },
+};
+
+static int nft_template_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_template *priv = nft_expr_priv(expr);
+
+ return 0;
+}
+
+static void nft_template_destroy(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
+{
+ struct nft_template *priv = nft_expr_priv(expr);
+
+}
+
+static int nft_template_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_template *priv = nft_expr_priv(expr);
+
+ NLA_PUT_BE32(skb, NFTA_TEMPLATE_ATTR, priv->field);
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static struct nft_expr_type nft_template_type;
+static const struct nft_expr_ops nft_template_ops = {
+ .type = &nft_template_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_template)),
+ .eval = nft_template_eval,
+ .init = nft_template_init,
+ .destroy = nft_template_destroy,
+ .dump = nft_template_dump,
+};
+
+static struct nft_expr_type nft_template_type __read_mostly = {
+ .name = "template",
+ .ops = &nft_template_ops,
+ .policy = nft_template_policy,
+ .maxattr = NFTA_TEMPLATE_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_template_module_init(void)
+{
+ return nft_register_expr(&nft_template_type);
+}
+
+static void __exit nft_template_module_exit(void)
+{
+ nft_unregister_expr(&nft_template_type);
+}
+
+module_init(nft_template_module_init);
+module_exit(nft_template_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("template");
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
new file mode 100644
index 000000000000..8e0bb75e7c51
--- /dev/null
+++ b/net/netfilter/nft_exthdr.c
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+// FIXME:
+#include <net/ipv6.h>
+
+struct nft_exthdr {
+ u8 type;
+ u8 offset;
+ u8 len;
+ enum nft_registers dreg:8;
+};
+
+static void nft_exthdr_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_exthdr *priv = nft_expr_priv(expr);
+ struct nft_data *dest = &data[priv->dreg];
+ unsigned int offset;
+ int err;
+
+ err = ipv6_find_hdr(pkt->skb, &offset, priv->type, NULL, NULL);
+ if (err < 0)
+ goto err;
+ offset += priv->offset;
+
+ if (skb_copy_bits(pkt->skb, offset, dest->data, priv->len) < 0)
+ goto err;
+ return;
+err:
+ data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+}
+
+static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = {
+ [NFTA_EXTHDR_DREG] = { .type = NLA_U32 },
+ [NFTA_EXTHDR_TYPE] = { .type = NLA_U8 },
+ [NFTA_EXTHDR_OFFSET] = { .type = NLA_U32 },
+ [NFTA_EXTHDR_LEN] = { .type = NLA_U32 },
+};
+
+static int nft_exthdr_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_exthdr *priv = nft_expr_priv(expr);
+ int err;
+
+ if (tb[NFTA_EXTHDR_DREG] == NULL ||
+ tb[NFTA_EXTHDR_TYPE] == NULL ||
+ tb[NFTA_EXTHDR_OFFSET] == NULL ||
+ tb[NFTA_EXTHDR_LEN] == NULL)
+ return -EINVAL;
+
+ priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]);
+ priv->offset = ntohl(nla_get_be32(tb[NFTA_EXTHDR_OFFSET]));
+ priv->len = ntohl(nla_get_be32(tb[NFTA_EXTHDR_LEN]));
+ if (priv->len == 0 ||
+ priv->len > FIELD_SIZEOF(struct nft_data, data))
+ return -EINVAL;
+
+ priv->dreg = ntohl(nla_get_be32(tb[NFTA_EXTHDR_DREG]));
+ err = nft_validate_output_register(priv->dreg);
+ if (err < 0)
+ return err;
+ return nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
+}
+
+static int nft_exthdr_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_exthdr *priv = nft_expr_priv(expr);
+
+ if (nla_put_be32(skb, NFTA_EXTHDR_DREG, htonl(priv->dreg)))
+ goto nla_put_failure;
+ if (nla_put_u8(skb, NFTA_EXTHDR_TYPE, priv->type))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_EXTHDR_OFFSET, htonl(priv->offset)))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_EXTHDR_LEN, htonl(priv->len)))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static struct nft_expr_type nft_exthdr_type;
+static const struct nft_expr_ops nft_exthdr_ops = {
+ .type = &nft_exthdr_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
+ .eval = nft_exthdr_eval,
+ .init = nft_exthdr_init,
+ .dump = nft_exthdr_dump,
+};
+
+static struct nft_expr_type nft_exthdr_type __read_mostly = {
+ .name = "exthdr",
+ .ops = &nft_exthdr_ops,
+ .policy = nft_exthdr_policy,
+ .maxattr = NFTA_EXTHDR_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_exthdr_module_init(void)
+{
+ return nft_register_expr(&nft_exthdr_type);
+}
+
+static void __exit nft_exthdr_module_exit(void)
+{
+ nft_unregister_expr(&nft_exthdr_type);
+}
+
+module_init(nft_exthdr_module_init);
+module_exit(nft_exthdr_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("exthdr");
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
new file mode 100644
index 000000000000..3d3f8fce10a5
--- /dev/null
+++ b/net/netfilter/nft_hash.c
@@ -0,0 +1,231 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/jhash.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_hash {
+ struct hlist_head *hash;
+ unsigned int hsize;
+};
+
+struct nft_hash_elem {
+ struct hlist_node hnode;
+ struct nft_data key;
+ struct nft_data data[];
+};
+
+static u32 nft_hash_rnd __read_mostly;
+static bool nft_hash_rnd_initted __read_mostly;
+
+static unsigned int nft_hash_data(const struct nft_data *data,
+ unsigned int hsize, unsigned int len)
+{
+ unsigned int h;
+
+ h = jhash(data->data, len, nft_hash_rnd);
+ return ((u64)h * hsize) >> 32;
+}
+
+static bool nft_hash_lookup(const struct nft_set *set,
+ const struct nft_data *key,
+ struct nft_data *data)
+{
+ const struct nft_hash *priv = nft_set_priv(set);
+ const struct nft_hash_elem *he;
+ unsigned int h;
+
+ h = nft_hash_data(key, priv->hsize, set->klen);
+ hlist_for_each_entry(he, &priv->hash[h], hnode) {
+ if (nft_data_cmp(&he->key, key, set->klen))
+ continue;
+ if (set->flags & NFT_SET_MAP)
+ nft_data_copy(data, he->data);
+ return true;
+ }
+ return false;
+}
+
+static void nft_hash_elem_destroy(const struct nft_set *set,
+ struct nft_hash_elem *he)
+{
+ nft_data_uninit(&he->key, NFT_DATA_VALUE);
+ if (set->flags & NFT_SET_MAP)
+ nft_data_uninit(he->data, set->dtype);
+ kfree(he);
+}
+
+static int nft_hash_insert(const struct nft_set *set,
+ const struct nft_set_elem *elem)
+{
+ struct nft_hash *priv = nft_set_priv(set);
+ struct nft_hash_elem *he;
+ unsigned int size, h;
+
+ if (elem->flags != 0)
+ return -EINVAL;
+
+ size = sizeof(*he);
+ if (set->flags & NFT_SET_MAP)
+ size += sizeof(he->data[0]);
+
+ he = kzalloc(size, GFP_KERNEL);
+ if (he == NULL)
+ return -ENOMEM;
+
+ nft_data_copy(&he->key, &elem->key);
+ if (set->flags & NFT_SET_MAP)
+ nft_data_copy(he->data, &elem->data);
+
+ h = nft_hash_data(&he->key, priv->hsize, set->klen);
+ hlist_add_head_rcu(&he->hnode, &priv->hash[h]);
+ return 0;
+}
+
+static void nft_hash_remove(const struct nft_set *set,
+ const struct nft_set_elem *elem)
+{
+ struct nft_hash_elem *he = elem->cookie;
+
+ hlist_del_rcu(&he->hnode);
+ kfree(he);
+}
+
+static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem)
+{
+ const struct nft_hash *priv = nft_set_priv(set);
+ struct nft_hash_elem *he;
+ unsigned int h;
+
+ h = nft_hash_data(&elem->key, priv->hsize, set->klen);
+ hlist_for_each_entry(he, &priv->hash[h], hnode) {
+ if (nft_data_cmp(&he->key, &elem->key, set->klen))
+ continue;
+
+ elem->cookie = he;
+ elem->flags = 0;
+ if (set->flags & NFT_SET_MAP)
+ nft_data_copy(&elem->data, he->data);
+ return 0;
+ }
+ return -ENOENT;
+}
+
+static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set,
+ struct nft_set_iter *iter)
+{
+ const struct nft_hash *priv = nft_set_priv(set);
+ const struct nft_hash_elem *he;
+ struct nft_set_elem elem;
+ unsigned int i;
+
+ for (i = 0; i < priv->hsize; i++) {
+ hlist_for_each_entry(he, &priv->hash[i], hnode) {
+ if (iter->count < iter->skip)
+ goto cont;
+
+ memcpy(&elem.key, &he->key, sizeof(elem.key));
+ if (set->flags & NFT_SET_MAP)
+ memcpy(&elem.data, he->data, sizeof(elem.data));
+ elem.flags = 0;
+
+ iter->err = iter->fn(ctx, set, iter, &elem);
+ if (iter->err < 0)
+ return;
+cont:
+ iter->count++;
+ }
+ }
+}
+
+static unsigned int nft_hash_privsize(const struct nlattr * const nla[])
+{
+ return sizeof(struct nft_hash);
+}
+
+static int nft_hash_init(const struct nft_set *set,
+ const struct nlattr * const tb[])
+{
+ struct nft_hash *priv = nft_set_priv(set);
+ unsigned int cnt, i;
+
+ if (unlikely(!nft_hash_rnd_initted)) {
+ get_random_bytes(&nft_hash_rnd, 4);
+ nft_hash_rnd_initted = true;
+ }
+
+ /* Aim for a load factor of 0.75 */
+ // FIXME: temporarily broken until we have set descriptions
+ cnt = 100;
+ cnt = cnt * 4 / 3;
+
+ priv->hash = kcalloc(cnt, sizeof(struct hlist_head), GFP_KERNEL);
+ if (priv->hash == NULL)
+ return -ENOMEM;
+ priv->hsize = cnt;
+
+ for (i = 0; i < cnt; i++)
+ INIT_HLIST_HEAD(&priv->hash[i]);
+
+ return 0;
+}
+
+static void nft_hash_destroy(const struct nft_set *set)
+{
+ const struct nft_hash *priv = nft_set_priv(set);
+ const struct hlist_node *next;
+ struct nft_hash_elem *elem;
+ unsigned int i;
+
+ for (i = 0; i < priv->hsize; i++) {
+ hlist_for_each_entry_safe(elem, next, &priv->hash[i], hnode) {
+ hlist_del(&elem->hnode);
+ nft_hash_elem_destroy(set, elem);
+ }
+ }
+ kfree(priv->hash);
+}
+
+static struct nft_set_ops nft_hash_ops __read_mostly = {
+ .privsize = nft_hash_privsize,
+ .init = nft_hash_init,
+ .destroy = nft_hash_destroy,
+ .get = nft_hash_get,
+ .insert = nft_hash_insert,
+ .remove = nft_hash_remove,
+ .lookup = nft_hash_lookup,
+ .walk = nft_hash_walk,
+ .features = NFT_SET_MAP,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_hash_module_init(void)
+{
+ return nft_register_set(&nft_hash_ops);
+}
+
+static void __exit nft_hash_module_exit(void)
+{
+ nft_unregister_set(&nft_hash_ops);
+}
+
+module_init(nft_hash_module_init);
+module_exit(nft_hash_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_SET();
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
new file mode 100644
index 000000000000..f169501f1ad4
--- /dev/null
+++ b/net/netfilter/nft_immediate.c
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_immediate_expr {
+ struct nft_data data;
+ enum nft_registers dreg:8;
+ u8 dlen;
+};
+
+static void nft_immediate_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_immediate_expr *priv = nft_expr_priv(expr);
+
+ nft_data_copy(&data[priv->dreg], &priv->data);
+}
+
+static const struct nla_policy nft_immediate_policy[NFTA_IMMEDIATE_MAX + 1] = {
+ [NFTA_IMMEDIATE_DREG] = { .type = NLA_U32 },
+ [NFTA_IMMEDIATE_DATA] = { .type = NLA_NESTED },
+};
+
+static int nft_immediate_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_immediate_expr *priv = nft_expr_priv(expr);
+ struct nft_data_desc desc;
+ int err;
+
+ if (tb[NFTA_IMMEDIATE_DREG] == NULL ||
+ tb[NFTA_IMMEDIATE_DATA] == NULL)
+ return -EINVAL;
+
+ priv->dreg = ntohl(nla_get_be32(tb[NFTA_IMMEDIATE_DREG]));
+ err = nft_validate_output_register(priv->dreg);
+ if (err < 0)
+ return err;
+
+ err = nft_data_init(ctx, &priv->data, &desc, tb[NFTA_IMMEDIATE_DATA]);
+ if (err < 0)
+ return err;
+ priv->dlen = desc.len;
+
+ err = nft_validate_data_load(ctx, priv->dreg, &priv->data, desc.type);
+ if (err < 0)
+ goto err1;
+
+ return 0;
+
+err1:
+ nft_data_uninit(&priv->data, desc.type);
+ return err;
+}
+
+static void nft_immediate_destroy(const struct nft_expr *expr)
+{
+ const struct nft_immediate_expr *priv = nft_expr_priv(expr);
+ return nft_data_uninit(&priv->data, nft_dreg_to_type(priv->dreg));
+}
+
+static int nft_immediate_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_immediate_expr *priv = nft_expr_priv(expr);
+
+ if (nla_put_be32(skb, NFTA_IMMEDIATE_DREG, htonl(priv->dreg)))
+ goto nla_put_failure;
+
+ return nft_data_dump(skb, NFTA_IMMEDIATE_DATA, &priv->data,
+ nft_dreg_to_type(priv->dreg), priv->dlen);
+
+nla_put_failure:
+ return -1;
+}
+
+static int nft_immediate_validate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nft_data **data)
+{
+ const struct nft_immediate_expr *priv = nft_expr_priv(expr);
+
+ if (priv->dreg == NFT_REG_VERDICT)
+ *data = &priv->data;
+
+ return 0;
+}
+
+static struct nft_expr_type nft_imm_type;
+static const struct nft_expr_ops nft_imm_ops = {
+ .type = &nft_imm_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_immediate_expr)),
+ .eval = nft_immediate_eval,
+ .init = nft_immediate_init,
+ .destroy = nft_immediate_destroy,
+ .dump = nft_immediate_dump,
+ .validate = nft_immediate_validate,
+};
+
+static struct nft_expr_type nft_imm_type __read_mostly = {
+ .name = "immediate",
+ .ops = &nft_imm_ops,
+ .policy = nft_immediate_policy,
+ .maxattr = NFTA_IMMEDIATE_MAX,
+ .owner = THIS_MODULE,
+};
+
+int __init nft_immediate_module_init(void)
+{
+ return nft_register_expr(&nft_imm_type);
+}
+
+void nft_immediate_module_exit(void)
+{
+ nft_unregister_expr(&nft_imm_type);
+}
diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c
new file mode 100644
index 000000000000..85da5bd02f64
--- /dev/null
+++ b/net/netfilter/nft_limit.c
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+static DEFINE_SPINLOCK(limit_lock);
+
+struct nft_limit {
+ u64 tokens;
+ u64 rate;
+ u64 unit;
+ unsigned long stamp;
+};
+
+static void nft_limit_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_limit *priv = nft_expr_priv(expr);
+
+ spin_lock_bh(&limit_lock);
+ if (time_after_eq(jiffies, priv->stamp)) {
+ priv->tokens = priv->rate;
+ priv->stamp = jiffies + priv->unit * HZ;
+ }
+
+ if (priv->tokens >= 1) {
+ priv->tokens--;
+ spin_unlock_bh(&limit_lock);
+ return;
+ }
+ spin_unlock_bh(&limit_lock);
+
+ data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+}
+
+static const struct nla_policy nft_limit_policy[NFTA_LIMIT_MAX + 1] = {
+ [NFTA_LIMIT_RATE] = { .type = NLA_U64 },
+ [NFTA_LIMIT_UNIT] = { .type = NLA_U64 },
+};
+
+static int nft_limit_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_limit *priv = nft_expr_priv(expr);
+
+ if (tb[NFTA_LIMIT_RATE] == NULL ||
+ tb[NFTA_LIMIT_UNIT] == NULL)
+ return -EINVAL;
+
+ priv->rate = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_RATE]));
+ priv->unit = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_UNIT]));
+ priv->stamp = jiffies + priv->unit * HZ;
+ priv->tokens = priv->rate;
+ return 0;
+}
+
+static int nft_limit_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_limit *priv = nft_expr_priv(expr);
+
+ if (nla_put_be64(skb, NFTA_LIMIT_RATE, cpu_to_be64(priv->rate)))
+ goto nla_put_failure;
+ if (nla_put_be64(skb, NFTA_LIMIT_UNIT, cpu_to_be64(priv->unit)))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static struct nft_expr_type nft_limit_type;
+static const struct nft_expr_ops nft_limit_ops = {
+ .type = &nft_limit_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_limit)),
+ .eval = nft_limit_eval,
+ .init = nft_limit_init,
+ .dump = nft_limit_dump,
+};
+
+static struct nft_expr_type nft_limit_type __read_mostly = {
+ .name = "limit",
+ .ops = &nft_limit_ops,
+ .policy = nft_limit_policy,
+ .maxattr = NFTA_LIMIT_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_limit_module_init(void)
+{
+ return nft_register_expr(&nft_limit_type);
+}
+
+static void __exit nft_limit_module_exit(void)
+{
+ nft_unregister_expr(&nft_limit_type);
+}
+
+module_init(nft_limit_module_init);
+module_exit(nft_limit_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("limit");
diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c
new file mode 100644
index 000000000000..57cad072a13e
--- /dev/null
+++ b/net/netfilter/nft_log.c
@@ -0,0 +1,146 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_log.h>
+#include <linux/netdevice.h>
+
+static const char *nft_log_null_prefix = "";
+
+struct nft_log {
+ struct nf_loginfo loginfo;
+ char *prefix;
+ int family;
+};
+
+static void nft_log_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_log *priv = nft_expr_priv(expr);
+ struct net *net = dev_net(pkt->in ? pkt->in : pkt->out);
+
+ nf_log_packet(net, priv->family, pkt->hooknum, pkt->skb, pkt->in,
+ pkt->out, &priv->loginfo, "%s", priv->prefix);
+}
+
+static const struct nla_policy nft_log_policy[NFTA_LOG_MAX + 1] = {
+ [NFTA_LOG_GROUP] = { .type = NLA_U16 },
+ [NFTA_LOG_PREFIX] = { .type = NLA_STRING },
+ [NFTA_LOG_SNAPLEN] = { .type = NLA_U32 },
+ [NFTA_LOG_QTHRESHOLD] = { .type = NLA_U16 },
+};
+
+static int nft_log_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_log *priv = nft_expr_priv(expr);
+ struct nf_loginfo *li = &priv->loginfo;
+ const struct nlattr *nla;
+
+ priv->family = ctx->afi->family;
+
+ nla = tb[NFTA_LOG_PREFIX];
+ if (nla != NULL) {
+ priv->prefix = kmalloc(nla_len(nla) + 1, GFP_KERNEL);
+ if (priv->prefix == NULL)
+ return -ENOMEM;
+ nla_strlcpy(priv->prefix, nla, nla_len(nla) + 1);
+ } else
+ priv->prefix = (char *)nft_log_null_prefix;
+
+ li->type = NF_LOG_TYPE_ULOG;
+ if (tb[NFTA_LOG_GROUP] != NULL)
+ li->u.ulog.group = ntohs(nla_get_be16(tb[NFTA_LOG_GROUP]));
+
+ if (tb[NFTA_LOG_SNAPLEN] != NULL)
+ li->u.ulog.copy_len = ntohl(nla_get_be32(tb[NFTA_LOG_SNAPLEN]));
+ if (tb[NFTA_LOG_QTHRESHOLD] != NULL) {
+ li->u.ulog.qthreshold =
+ ntohs(nla_get_be16(tb[NFTA_LOG_QTHRESHOLD]));
+ }
+
+ return 0;
+}
+
+static void nft_log_destroy(const struct nft_expr *expr)
+{
+ struct nft_log *priv = nft_expr_priv(expr);
+
+ if (priv->prefix != nft_log_null_prefix)
+ kfree(priv->prefix);
+}
+
+static int nft_log_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_log *priv = nft_expr_priv(expr);
+ const struct nf_loginfo *li = &priv->loginfo;
+
+ if (priv->prefix != nft_log_null_prefix)
+ if (nla_put_string(skb, NFTA_LOG_PREFIX, priv->prefix))
+ goto nla_put_failure;
+ if (li->u.ulog.group)
+ if (nla_put_be16(skb, NFTA_LOG_GROUP, htons(li->u.ulog.group)))
+ goto nla_put_failure;
+ if (li->u.ulog.copy_len)
+ if (nla_put_be32(skb, NFTA_LOG_SNAPLEN,
+ htonl(li->u.ulog.copy_len)))
+ goto nla_put_failure;
+ if (li->u.ulog.qthreshold)
+ if (nla_put_be16(skb, NFTA_LOG_QTHRESHOLD,
+ htons(li->u.ulog.qthreshold)))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static struct nft_expr_type nft_log_type;
+static const struct nft_expr_ops nft_log_ops = {
+ .type = &nft_log_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_log)),
+ .eval = nft_log_eval,
+ .init = nft_log_init,
+ .destroy = nft_log_destroy,
+ .dump = nft_log_dump,
+};
+
+static struct nft_expr_type nft_log_type __read_mostly = {
+ .name = "log",
+ .ops = &nft_log_ops,
+ .policy = nft_log_policy,
+ .maxattr = NFTA_LOG_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_log_module_init(void)
+{
+ return nft_register_expr(&nft_log_type);
+}
+
+static void __exit nft_log_module_exit(void)
+{
+ nft_unregister_expr(&nft_log_type);
+}
+
+module_init(nft_log_module_init);
+module_exit(nft_log_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("log");
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
new file mode 100644
index 000000000000..8a6116b75b5a
--- /dev/null
+++ b/net/netfilter/nft_lookup.c
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2009 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_lookup {
+ struct nft_set *set;
+ enum nft_registers sreg:8;
+ enum nft_registers dreg:8;
+ struct nft_set_binding binding;
+};
+
+static void nft_lookup_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_lookup *priv = nft_expr_priv(expr);
+ const struct nft_set *set = priv->set;
+
+ if (set->ops->lookup(set, &data[priv->sreg], &data[priv->dreg]))
+ return;
+ data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+}
+
+static const struct nla_policy nft_lookup_policy[NFTA_LOOKUP_MAX + 1] = {
+ [NFTA_LOOKUP_SET] = { .type = NLA_STRING },
+ [NFTA_LOOKUP_SREG] = { .type = NLA_U32 },
+ [NFTA_LOOKUP_DREG] = { .type = NLA_U32 },
+};
+
+static int nft_lookup_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_lookup *priv = nft_expr_priv(expr);
+ struct nft_set *set;
+ int err;
+
+ if (tb[NFTA_LOOKUP_SET] == NULL ||
+ tb[NFTA_LOOKUP_SREG] == NULL)
+ return -EINVAL;
+
+ set = nf_tables_set_lookup(ctx->table, tb[NFTA_LOOKUP_SET]);
+ if (IS_ERR(set))
+ return PTR_ERR(set);
+
+ priv->sreg = ntohl(nla_get_be32(tb[NFTA_LOOKUP_SREG]));
+ err = nft_validate_input_register(priv->sreg);
+ if (err < 0)
+ return err;
+
+ if (tb[NFTA_LOOKUP_DREG] != NULL) {
+ if (!(set->flags & NFT_SET_MAP))
+ return -EINVAL;
+
+ priv->dreg = ntohl(nla_get_be32(tb[NFTA_LOOKUP_DREG]));
+ err = nft_validate_output_register(priv->dreg);
+ if (err < 0)
+ return err;
+
+ if (priv->dreg == NFT_REG_VERDICT) {
+ if (set->dtype != NFT_DATA_VERDICT)
+ return -EINVAL;
+ } else if (set->dtype == NFT_DATA_VERDICT)
+ return -EINVAL;
+ } else if (set->flags & NFT_SET_MAP)
+ return -EINVAL;
+
+ err = nf_tables_bind_set(ctx, set, &priv->binding);
+ if (err < 0)
+ return err;
+
+ priv->set = set;
+ return 0;
+}
+
+static void nft_lookup_destroy(const struct nft_expr *expr)
+{
+ struct nft_lookup *priv = nft_expr_priv(expr);
+
+ nf_tables_unbind_set(NULL, priv->set, &priv->binding);
+}
+
+static int nft_lookup_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_lookup *priv = nft_expr_priv(expr);
+
+ if (nla_put_string(skb, NFTA_LOOKUP_SET, priv->set->name))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_LOOKUP_SREG, htonl(priv->sreg)))
+ goto nla_put_failure;
+ if (priv->set->flags & NFT_SET_MAP)
+ if (nla_put_be32(skb, NFTA_LOOKUP_DREG, htonl(priv->dreg)))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static struct nft_expr_type nft_lookup_type;
+static const struct nft_expr_ops nft_lookup_ops = {
+ .type = &nft_lookup_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_lookup)),
+ .eval = nft_lookup_eval,
+ .init = nft_lookup_init,
+ .destroy = nft_lookup_destroy,
+ .dump = nft_lookup_dump,
+};
+
+static struct nft_expr_type nft_lookup_type __read_mostly = {
+ .name = "lookup",
+ .ops = &nft_lookup_ops,
+ .policy = nft_lookup_policy,
+ .maxattr = NFTA_LOOKUP_MAX,
+ .owner = THIS_MODULE,
+};
+
+int __init nft_lookup_module_init(void)
+{
+ return nft_register_expr(&nft_lookup_type);
+}
+
+void nft_lookup_module_exit(void)
+{
+ nft_unregister_expr(&nft_lookup_type);
+}
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
new file mode 100644
index 000000000000..8c28220a90b3
--- /dev/null
+++ b/net/netfilter/nft_meta.c
@@ -0,0 +1,228 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/dst.h>
+#include <net/sock.h>
+#include <net/tcp_states.h> /* for TCP_TIME_WAIT */
+#include <net/netfilter/nf_tables.h>
+
+struct nft_meta {
+ enum nft_meta_keys key:8;
+ enum nft_registers dreg:8;
+};
+
+static void nft_meta_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_meta *priv = nft_expr_priv(expr);
+ const struct sk_buff *skb = pkt->skb;
+ const struct net_device *in = pkt->in, *out = pkt->out;
+ struct nft_data *dest = &data[priv->dreg];
+
+ switch (priv->key) {
+ case NFT_META_LEN:
+ dest->data[0] = skb->len;
+ break;
+ case NFT_META_PROTOCOL:
+ *(__be16 *)dest->data = skb->protocol;
+ break;
+ case NFT_META_PRIORITY:
+ dest->data[0] = skb->priority;
+ break;
+ case NFT_META_MARK:
+ dest->data[0] = skb->mark;
+ break;
+ case NFT_META_IIF:
+ if (in == NULL)
+ goto err;
+ dest->data[0] = in->ifindex;
+ break;
+ case NFT_META_OIF:
+ if (out == NULL)
+ goto err;
+ dest->data[0] = out->ifindex;
+ break;
+ case NFT_META_IIFNAME:
+ if (in == NULL)
+ goto err;
+ strncpy((char *)dest->data, in->name, sizeof(dest->data));
+ break;
+ case NFT_META_OIFNAME:
+ if (out == NULL)
+ goto err;
+ strncpy((char *)dest->data, out->name, sizeof(dest->data));
+ break;
+ case NFT_META_IIFTYPE:
+ if (in == NULL)
+ goto err;
+ *(u16 *)dest->data = in->type;
+ break;
+ case NFT_META_OIFTYPE:
+ if (out == NULL)
+ goto err;
+ *(u16 *)dest->data = out->type;
+ break;
+ case NFT_META_SKUID:
+ if (skb->sk == NULL || skb->sk->sk_state == TCP_TIME_WAIT)
+ goto err;
+
+ read_lock_bh(&skb->sk->sk_callback_lock);
+ if (skb->sk->sk_socket == NULL ||
+ skb->sk->sk_socket->file == NULL) {
+ read_unlock_bh(&skb->sk->sk_callback_lock);
+ goto err;
+ }
+
+ dest->data[0] =
+ from_kuid_munged(&init_user_ns,
+ skb->sk->sk_socket->file->f_cred->fsuid);
+ read_unlock_bh(&skb->sk->sk_callback_lock);
+ break;
+ case NFT_META_SKGID:
+ if (skb->sk == NULL || skb->sk->sk_state == TCP_TIME_WAIT)
+ goto err;
+
+ read_lock_bh(&skb->sk->sk_callback_lock);
+ if (skb->sk->sk_socket == NULL ||
+ skb->sk->sk_socket->file == NULL) {
+ read_unlock_bh(&skb->sk->sk_callback_lock);
+ goto err;
+ }
+ dest->data[0] =
+ from_kgid_munged(&init_user_ns,
+ skb->sk->sk_socket->file->f_cred->fsgid);
+ read_unlock_bh(&skb->sk->sk_callback_lock);
+ break;
+#ifdef CONFIG_NET_CLS_ROUTE
+ case NFT_META_RTCLASSID: {
+ const struct dst_entry *dst = skb_dst(skb);
+
+ if (dst == NULL)
+ goto err;
+ dest->data[0] = dst->tclassid;
+ break;
+ }
+#endif
+#ifdef CONFIG_NETWORK_SECMARK
+ case NFT_META_SECMARK:
+ dest->data[0] = skb->secmark;
+ break;
+#endif
+ default:
+ WARN_ON(1);
+ goto err;
+ }
+ return;
+
+err:
+ data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+}
+
+static const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = {
+ [NFTA_META_DREG] = { .type = NLA_U32 },
+ [NFTA_META_KEY] = { .type = NLA_U32 },
+};
+
+static int nft_meta_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_meta *priv = nft_expr_priv(expr);
+ int err;
+
+ if (tb[NFTA_META_DREG] == NULL ||
+ tb[NFTA_META_KEY] == NULL)
+ return -EINVAL;
+
+ priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY]));
+ switch (priv->key) {
+ case NFT_META_LEN:
+ case NFT_META_PROTOCOL:
+ case NFT_META_PRIORITY:
+ case NFT_META_MARK:
+ case NFT_META_IIF:
+ case NFT_META_OIF:
+ case NFT_META_IIFNAME:
+ case NFT_META_OIFNAME:
+ case NFT_META_IIFTYPE:
+ case NFT_META_OIFTYPE:
+ case NFT_META_SKUID:
+ case NFT_META_SKGID:
+#ifdef CONFIG_NET_CLS_ROUTE
+ case NFT_META_RTCLASSID:
+#endif
+#ifdef CONFIG_NETWORK_SECMARK
+ case NFT_META_SECMARK:
+#endif
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ priv->dreg = ntohl(nla_get_be32(tb[NFTA_META_DREG]));
+ err = nft_validate_output_register(priv->dreg);
+ if (err < 0)
+ return err;
+ return nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
+}
+
+static int nft_meta_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_meta *priv = nft_expr_priv(expr);
+
+ if (nla_put_be32(skb, NFTA_META_DREG, htonl(priv->dreg)))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_META_KEY, htonl(priv->key)))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static struct nft_expr_type nft_meta_type;
+static const struct nft_expr_ops nft_meta_ops = {
+ .type = &nft_meta_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)),
+ .eval = nft_meta_eval,
+ .init = nft_meta_init,
+ .dump = nft_meta_dump,
+};
+
+static struct nft_expr_type nft_meta_type __read_mostly = {
+ .name = "meta",
+ .ops = &nft_meta_ops,
+ .policy = nft_meta_policy,
+ .maxattr = NFTA_META_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_meta_module_init(void)
+{
+ return nft_register_expr(&nft_meta_type);
+}
+
+static void __exit nft_meta_module_exit(void)
+{
+ nft_unregister_expr(&nft_meta_type);
+}
+
+module_init(nft_meta_module_init);
+module_exit(nft_meta_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("meta");
diff --git a/net/netfilter/nft_meta_target.c b/net/netfilter/nft_meta_target.c
new file mode 100644
index 000000000000..71177df75ffb
--- /dev/null
+++ b/net/netfilter/nft_meta_target.c
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_meta {
+ enum nft_meta_keys key;
+};
+
+static void nft_meta_eval(const struct nft_expr *expr,
+ struct nft_data *nfres,
+ struct nft_data *data,
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_meta *meta = nft_expr_priv(expr);
+ struct sk_buff *skb = pkt->skb;
+ u32 val = data->data[0];
+
+ switch (meta->key) {
+ case NFT_META_MARK:
+ skb->mark = val;
+ break;
+ case NFT_META_PRIORITY:
+ skb->priority = val;
+ break;
+ case NFT_META_NFTRACE:
+ skb->nf_trace = val;
+ break;
+#ifdef CONFIG_NETWORK_SECMARK
+ case NFT_META_SECMARK:
+ skb->secmark = val;
+ break;
+#endif
+ default:
+ WARN_ON(1);
+ }
+}
+
+static const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = {
+ [NFTA_META_KEY] = { .type = NLA_U32 },
+};
+
+static int nft_meta_init(const struct nft_expr *expr, struct nlattr *tb[])
+{
+ struct nft_meta *meta = nft_expr_priv(expr);
+
+ if (tb[NFTA_META_KEY] == NULL)
+ return -EINVAL;
+
+ meta->key = ntohl(nla_get_be32(tb[NFTA_META_KEY]));
+ switch (meta->key) {
+ case NFT_META_MARK:
+ case NFT_META_PRIORITY:
+ case NFT_META_NFTRACE:
+#ifdef CONFIG_NETWORK_SECMARK
+ case NFT_META_SECMARK:
+#endif
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int nft_meta_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ struct nft_meta *meta = nft_expr_priv(expr);
+
+ NLA_PUT_BE32(skb, NFTA_META_KEY, htonl(meta->key));
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static struct nft_expr_ops meta_target __read_mostly = {
+ .name = "meta",
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)),
+ .owner = THIS_MODULE,
+ .eval = nft_meta_eval,
+ .init = nft_meta_init,
+ .dump = nft_meta_dump,
+ .policy = nft_meta_policy,
+ .maxattr = NFTA_META_MAX,
+};
+
+static int __init nft_meta_target_init(void)
+{
+ return nft_register_expr(&meta_target);
+}
+
+static void __exit nft_meta_target_exit(void)
+{
+ nft_unregister_expr(&meta_target);
+}
+
+module_init(nft_meta_target_init);
+module_exit(nft_meta_target_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_EXPR("meta");
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
new file mode 100644
index 000000000000..b0b87b2d2411
--- /dev/null
+++ b/net/netfilter/nft_nat.c
@@ -0,0 +1,220 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2012 Pablo Neira Ayuso <pablo@netfilter.org>
+ * Copyright (c) 2012 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/string.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_nat_l3proto.h>
+#include <net/ip.h>
+
+struct nft_nat {
+ enum nft_registers sreg_addr_min:8;
+ enum nft_registers sreg_addr_max:8;
+ enum nft_registers sreg_proto_min:8;
+ enum nft_registers sreg_proto_max:8;
+ int family;
+ enum nf_nat_manip_type type;
+};
+
+static void nft_nat_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_nat *priv = nft_expr_priv(expr);
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct = nf_ct_get(pkt->skb, &ctinfo);
+ struct nf_nat_range range;
+
+ memset(&range, 0, sizeof(range));
+ if (priv->sreg_addr_min) {
+ if (priv->family == AF_INET) {
+ range.min_addr.ip = data[priv->sreg_addr_min].data[0];
+ range.max_addr.ip = data[priv->sreg_addr_max].data[0];
+
+ } else {
+ memcpy(range.min_addr.ip6,
+ data[priv->sreg_addr_min].data,
+ sizeof(struct nft_data));
+ memcpy(range.max_addr.ip6,
+ data[priv->sreg_addr_max].data,
+ sizeof(struct nft_data));
+ }
+ range.flags |= NF_NAT_RANGE_MAP_IPS;
+ }
+
+ if (priv->sreg_proto_min) {
+ range.min_proto.all = data[priv->sreg_proto_min].data[0];
+ range.max_proto.all = data[priv->sreg_proto_max].data[0];
+ range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
+ }
+
+ data[NFT_REG_VERDICT].verdict =
+ nf_nat_setup_info(ct, &range, priv->type);
+}
+
+static const struct nla_policy nft_nat_policy[NFTA_NAT_MAX + 1] = {
+ [NFTA_NAT_TYPE] = { .type = NLA_U32 },
+ [NFTA_NAT_FAMILY] = { .type = NLA_U32 },
+ [NFTA_NAT_REG_ADDR_MIN] = { .type = NLA_U32 },
+ [NFTA_NAT_REG_ADDR_MAX] = { .type = NLA_U32 },
+ [NFTA_NAT_REG_PROTO_MIN] = { .type = NLA_U32 },
+ [NFTA_NAT_REG_PROTO_MAX] = { .type = NLA_U32 },
+};
+
+static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_nat *priv = nft_expr_priv(expr);
+ int err;
+
+ if (tb[NFTA_NAT_TYPE] == NULL)
+ return -EINVAL;
+
+ switch (ntohl(nla_get_be32(tb[NFTA_NAT_TYPE]))) {
+ case NFT_NAT_SNAT:
+ priv->type = NF_NAT_MANIP_SRC;
+ break;
+ case NFT_NAT_DNAT:
+ priv->type = NF_NAT_MANIP_DST;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (tb[NFTA_NAT_FAMILY] == NULL)
+ return -EINVAL;
+
+ priv->family = ntohl(nla_get_be32(tb[NFTA_NAT_FAMILY]));
+ if (priv->family != AF_INET && priv->family != AF_INET6)
+ return -EINVAL;
+
+ if (tb[NFTA_NAT_REG_ADDR_MIN]) {
+ priv->sreg_addr_min = ntohl(nla_get_be32(
+ tb[NFTA_NAT_REG_ADDR_MIN]));
+ err = nft_validate_input_register(priv->sreg_addr_min);
+ if (err < 0)
+ return err;
+ }
+
+ if (tb[NFTA_NAT_REG_ADDR_MAX]) {
+ priv->sreg_addr_max = ntohl(nla_get_be32(
+ tb[NFTA_NAT_REG_ADDR_MAX]));
+ err = nft_validate_input_register(priv->sreg_addr_max);
+ if (err < 0)
+ return err;
+ } else
+ priv->sreg_addr_max = priv->sreg_addr_min;
+
+ if (tb[NFTA_NAT_REG_PROTO_MIN]) {
+ priv->sreg_proto_min = ntohl(nla_get_be32(
+ tb[NFTA_NAT_REG_PROTO_MIN]));
+ err = nft_validate_input_register(priv->sreg_proto_min);
+ if (err < 0)
+ return err;
+ }
+
+ if (tb[NFTA_NAT_REG_PROTO_MAX]) {
+ priv->sreg_proto_max = ntohl(nla_get_be32(
+ tb[NFTA_NAT_REG_PROTO_MAX]));
+ err = nft_validate_input_register(priv->sreg_proto_max);
+ if (err < 0)
+ return err;
+ } else
+ priv->sreg_proto_max = priv->sreg_proto_min;
+
+ return 0;
+}
+
+static int nft_nat_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_nat *priv = nft_expr_priv(expr);
+
+ switch (priv->type) {
+ case NF_NAT_MANIP_SRC:
+ if (nla_put_be32(skb, NFTA_NAT_TYPE, htonl(NFT_NAT_SNAT)))
+ goto nla_put_failure;
+ break;
+ case NF_NAT_MANIP_DST:
+ if (nla_put_be32(skb, NFTA_NAT_TYPE, htonl(NFT_NAT_DNAT)))
+ goto nla_put_failure;
+ break;
+ }
+
+ if (nla_put_be32(skb, NFTA_NAT_FAMILY, htonl(priv->family)))
+ goto nla_put_failure;
+ if (nla_put_be32(skb,
+ NFTA_NAT_REG_ADDR_MIN, htonl(priv->sreg_addr_min)))
+ goto nla_put_failure;
+ if (nla_put_be32(skb,
+ NFTA_NAT_REG_ADDR_MAX, htonl(priv->sreg_addr_max)))
+ goto nla_put_failure;
+ if (nla_put_be32(skb,
+ NFTA_NAT_REG_PROTO_MIN, htonl(priv->sreg_proto_min)))
+ goto nla_put_failure;
+ if (nla_put_be32(skb,
+ NFTA_NAT_REG_PROTO_MAX, htonl(priv->sreg_proto_max)))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static struct nft_expr_type nft_nat_type;
+static const struct nft_expr_ops nft_nat_ops = {
+ .type = &nft_nat_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_nat)),
+ .eval = nft_nat_eval,
+ .init = nft_nat_init,
+ .dump = nft_nat_dump,
+};
+
+static struct nft_expr_type nft_nat_type __read_mostly = {
+ .name = "nat",
+ .ops = &nft_nat_ops,
+ .policy = nft_nat_policy,
+ .maxattr = NFTA_NAT_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_nat_module_init(void)
+{
+ int err;
+
+ err = nft_register_expr(&nft_nat_type);
+ if (err < 0)
+ return err;
+
+ return 0;
+}
+
+static void __exit nft_nat_module_exit(void)
+{
+ nft_unregister_expr(&nft_nat_type);
+}
+
+module_init(nft_nat_module_init);
+module_exit(nft_nat_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Tomasz Bursztyka <tomasz.bursztyka@linux.intel.com>");
+MODULE_ALIAS_NFT_EXPR("nat");
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
new file mode 100644
index 000000000000..a2aeb318678f
--- /dev/null
+++ b/net/netfilter/nft_payload.c
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+
+static void nft_payload_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_payload *priv = nft_expr_priv(expr);
+ const struct sk_buff *skb = pkt->skb;
+ struct nft_data *dest = &data[priv->dreg];
+ int offset;
+
+ switch (priv->base) {
+ case NFT_PAYLOAD_LL_HEADER:
+ if (!skb_mac_header_was_set(skb))
+ goto err;
+ offset = skb_mac_header(skb) - skb->data;
+ break;
+ case NFT_PAYLOAD_NETWORK_HEADER:
+ offset = skb_network_offset(skb);
+ break;
+ case NFT_PAYLOAD_TRANSPORT_HEADER:
+ offset = pkt->xt.thoff;
+ break;
+ default:
+ BUG();
+ }
+ offset += priv->offset;
+
+ if (skb_copy_bits(skb, offset, dest->data, priv->len) < 0)
+ goto err;
+ return;
+err:
+ data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+}
+
+static const struct nla_policy nft_payload_policy[NFTA_PAYLOAD_MAX + 1] = {
+ [NFTA_PAYLOAD_DREG] = { .type = NLA_U32 },
+ [NFTA_PAYLOAD_BASE] = { .type = NLA_U32 },
+ [NFTA_PAYLOAD_OFFSET] = { .type = NLA_U32 },
+ [NFTA_PAYLOAD_LEN] = { .type = NLA_U32 },
+};
+
+static int nft_payload_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_payload *priv = nft_expr_priv(expr);
+ int err;
+
+ priv->base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE]));
+ priv->offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET]));
+ priv->len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN]));
+
+ priv->dreg = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_DREG]));
+ err = nft_validate_output_register(priv->dreg);
+ if (err < 0)
+ return err;
+ return nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
+}
+
+static int nft_payload_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_payload *priv = nft_expr_priv(expr);
+
+ if (nla_put_be32(skb, NFTA_PAYLOAD_DREG, htonl(priv->dreg)) ||
+ nla_put_be32(skb, NFTA_PAYLOAD_BASE, htonl(priv->base)) ||
+ nla_put_be32(skb, NFTA_PAYLOAD_OFFSET, htonl(priv->offset)) ||
+ nla_put_be32(skb, NFTA_PAYLOAD_LEN, htonl(priv->len)))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static struct nft_expr_type nft_payload_type;
+static const struct nft_expr_ops nft_payload_ops = {
+ .type = &nft_payload_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_payload)),
+ .eval = nft_payload_eval,
+ .init = nft_payload_init,
+ .dump = nft_payload_dump,
+};
+
+const struct nft_expr_ops nft_payload_fast_ops = {
+ .type = &nft_payload_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_payload)),
+ .eval = nft_payload_eval,
+ .init = nft_payload_init,
+ .dump = nft_payload_dump,
+};
+
+static const struct nft_expr_ops *
+nft_payload_select_ops(const struct nft_ctx *ctx,
+ const struct nlattr * const tb[])
+{
+ enum nft_payload_bases base;
+ unsigned int offset, len;
+
+ if (tb[NFTA_PAYLOAD_DREG] == NULL ||
+ tb[NFTA_PAYLOAD_BASE] == NULL ||
+ tb[NFTA_PAYLOAD_OFFSET] == NULL ||
+ tb[NFTA_PAYLOAD_LEN] == NULL)
+ return ERR_PTR(-EINVAL);
+
+ base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE]));
+ switch (base) {
+ case NFT_PAYLOAD_LL_HEADER:
+ case NFT_PAYLOAD_NETWORK_HEADER:
+ case NFT_PAYLOAD_TRANSPORT_HEADER:
+ break;
+ default:
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET]));
+ len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN]));
+ if (len == 0 || len > FIELD_SIZEOF(struct nft_data, data))
+ return ERR_PTR(-EINVAL);
+
+ if (len <= 4 && IS_ALIGNED(offset, len) && base != NFT_PAYLOAD_LL_HEADER)
+ return &nft_payload_fast_ops;
+ else
+ return &nft_payload_ops;
+}
+
+static struct nft_expr_type nft_payload_type __read_mostly = {
+ .name = "payload",
+ .select_ops = nft_payload_select_ops,
+ .policy = nft_payload_policy,
+ .maxattr = NFTA_PAYLOAD_MAX,
+ .owner = THIS_MODULE,
+};
+
+int __init nft_payload_module_init(void)
+{
+ return nft_register_expr(&nft_payload_type);
+}
+
+void nft_payload_module_exit(void)
+{
+ nft_unregister_expr(&nft_payload_type);
+}
diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c
new file mode 100644
index 000000000000..ca0c1b231bfe
--- /dev/null
+++ b/net/netfilter/nft_rbtree.c
@@ -0,0 +1,247 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_rbtree {
+ struct rb_root root;
+};
+
+struct nft_rbtree_elem {
+ struct rb_node node;
+ u16 flags;
+ struct nft_data key;
+ struct nft_data data[];
+};
+
+static bool nft_rbtree_lookup(const struct nft_set *set,
+ const struct nft_data *key,
+ struct nft_data *data)
+{
+ const struct nft_rbtree *priv = nft_set_priv(set);
+ const struct nft_rbtree_elem *rbe, *interval = NULL;
+ const struct rb_node *parent = priv->root.rb_node;
+ int d;
+
+ while (parent != NULL) {
+ rbe = rb_entry(parent, struct nft_rbtree_elem, node);
+
+ d = nft_data_cmp(&rbe->key, key, set->klen);
+ if (d < 0) {
+ parent = parent->rb_left;
+ interval = rbe;
+ } else if (d > 0)
+ parent = parent->rb_right;
+ else {
+found:
+ if (rbe->flags & NFT_SET_ELEM_INTERVAL_END)
+ goto out;
+ if (set->flags & NFT_SET_MAP)
+ nft_data_copy(data, rbe->data);
+ return true;
+ }
+ }
+
+ if (set->flags & NFT_SET_INTERVAL && interval != NULL) {
+ rbe = interval;
+ goto found;
+ }
+out:
+ return false;
+}
+
+static void nft_rbtree_elem_destroy(const struct nft_set *set,
+ struct nft_rbtree_elem *rbe)
+{
+ nft_data_uninit(&rbe->key, NFT_DATA_VALUE);
+ if (set->flags & NFT_SET_MAP)
+ nft_data_uninit(rbe->data, set->dtype);
+ kfree(rbe);
+}
+
+static int __nft_rbtree_insert(const struct nft_set *set,
+ struct nft_rbtree_elem *new)
+{
+ struct nft_rbtree *priv = nft_set_priv(set);
+ struct nft_rbtree_elem *rbe;
+ struct rb_node *parent, **p;
+ int d;
+
+ parent = NULL;
+ p = &priv->root.rb_node;
+ while (*p != NULL) {
+ parent = *p;
+ rbe = rb_entry(parent, struct nft_rbtree_elem, node);
+ d = nft_data_cmp(&rbe->key, &new->key, set->klen);
+ if (d < 0)
+ p = &parent->rb_left;
+ else if (d > 0)
+ p = &parent->rb_right;
+ else
+ return -EEXIST;
+ }
+ rb_link_node(&new->node, parent, p);
+ rb_insert_color(&new->node, &priv->root);
+ return 0;
+}
+
+static int nft_rbtree_insert(const struct nft_set *set,
+ const struct nft_set_elem *elem)
+{
+ struct nft_rbtree_elem *rbe;
+ unsigned int size;
+ int err;
+
+ size = sizeof(*rbe);
+ if (set->flags & NFT_SET_MAP)
+ size += sizeof(rbe->data[0]);
+
+ rbe = kzalloc(size, GFP_KERNEL);
+ if (rbe == NULL)
+ return -ENOMEM;
+
+ rbe->flags = elem->flags;
+ nft_data_copy(&rbe->key, &elem->key);
+ if (set->flags & NFT_SET_MAP)
+ nft_data_copy(rbe->data, &elem->data);
+
+ err = __nft_rbtree_insert(set, rbe);
+ if (err < 0)
+ kfree(rbe);
+ return err;
+}
+
+static void nft_rbtree_remove(const struct nft_set *set,
+ const struct nft_set_elem *elem)
+{
+ struct nft_rbtree *priv = nft_set_priv(set);
+ struct nft_rbtree_elem *rbe = elem->cookie;
+
+ rb_erase(&rbe->node, &priv->root);
+ kfree(rbe);
+}
+
+static int nft_rbtree_get(const struct nft_set *set, struct nft_set_elem *elem)
+{
+ const struct nft_rbtree *priv = nft_set_priv(set);
+ const struct rb_node *parent = priv->root.rb_node;
+ struct nft_rbtree_elem *rbe;
+ int d;
+
+ while (parent != NULL) {
+ rbe = rb_entry(parent, struct nft_rbtree_elem, node);
+
+ d = nft_data_cmp(&rbe->key, &elem->key, set->klen);
+ if (d < 0)
+ parent = parent->rb_left;
+ else if (d > 0)
+ parent = parent->rb_right;
+ else {
+ elem->cookie = rbe;
+ if (set->flags & NFT_SET_MAP)
+ nft_data_copy(&elem->data, rbe->data);
+ elem->flags = rbe->flags;
+ return 0;
+ }
+ }
+ return -ENOENT;
+}
+
+static void nft_rbtree_walk(const struct nft_ctx *ctx,
+ const struct nft_set *set,
+ struct nft_set_iter *iter)
+{
+ const struct nft_rbtree *priv = nft_set_priv(set);
+ const struct nft_rbtree_elem *rbe;
+ struct nft_set_elem elem;
+ struct rb_node *node;
+
+ for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
+ if (iter->count < iter->skip)
+ goto cont;
+
+ rbe = rb_entry(node, struct nft_rbtree_elem, node);
+ nft_data_copy(&elem.key, &rbe->key);
+ if (set->flags & NFT_SET_MAP)
+ nft_data_copy(&elem.data, rbe->data);
+ elem.flags = rbe->flags;
+
+ iter->err = iter->fn(ctx, set, iter, &elem);
+ if (iter->err < 0)
+ return;
+cont:
+ iter->count++;
+ }
+}
+
+static unsigned int nft_rbtree_privsize(const struct nlattr * const nla[])
+{
+ return sizeof(struct nft_rbtree);
+}
+
+static int nft_rbtree_init(const struct nft_set *set,
+ const struct nlattr * const nla[])
+{
+ struct nft_rbtree *priv = nft_set_priv(set);
+
+ priv->root = RB_ROOT;
+ return 0;
+}
+
+static void nft_rbtree_destroy(const struct nft_set *set)
+{
+ struct nft_rbtree *priv = nft_set_priv(set);
+ struct nft_rbtree_elem *rbe;
+ struct rb_node *node;
+
+ while ((node = priv->root.rb_node) != NULL) {
+ rb_erase(node, &priv->root);
+ rbe = rb_entry(node, struct nft_rbtree_elem, node);
+ nft_rbtree_elem_destroy(set, rbe);
+ }
+}
+
+static struct nft_set_ops nft_rbtree_ops __read_mostly = {
+ .privsize = nft_rbtree_privsize,
+ .init = nft_rbtree_init,
+ .destroy = nft_rbtree_destroy,
+ .insert = nft_rbtree_insert,
+ .remove = nft_rbtree_remove,
+ .get = nft_rbtree_get,
+ .lookup = nft_rbtree_lookup,
+ .walk = nft_rbtree_walk,
+ .features = NFT_SET_INTERVAL | NFT_SET_MAP,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_rbtree_module_init(void)
+{
+ return nft_register_set(&nft_rbtree_ops);
+}
+
+static void __exit nft_rbtree_module_exit(void)
+{
+ nft_unregister_set(&nft_rbtree_ops);
+}
+
+module_init(nft_rbtree_module_init);
+module_exit(nft_rbtree_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_SET();