summaryrefslogtreecommitdiff
path: root/net/netfilter
diff options
context:
space:
mode:
Diffstat (limited to 'net/netfilter')
-rw-r--r--net/netfilter/Kconfig28
-rw-r--r--net/netfilter/Makefile9
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_gen.h19
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ip.c14
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ipmac.c66
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_port.c18
-rw-r--r--net/netfilter/ipset/ip_set_core.c158
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h28
-rw-r--r--net/netfilter/ipset/ip_set_hash_mac.c3
-rw-r--r--net/netfilter/ipset/ip_set_hash_netiface.c4
-rw-r--r--net/netfilter/ipset/ip_set_list_set.c62
-rw-r--r--net/netfilter/ipvs/ip_vs_app.c8
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c54
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c17
-rw-r--r--net/netfilter/ipvs/ip_vs_pe_sip.c6
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_sctp.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c12
-rw-r--r--net/netfilter/nf_conntrack_core.c41
-rw-r--r--net/netfilter/nf_conntrack_expect.c7
-rw-r--r--net/netfilter/nf_conntrack_ftp.c17
-rw-r--r--net/netfilter/nf_conntrack_helper.c2
-rw-r--r--net/netfilter/nf_conntrack_irc.c7
-rw-r--r--net/netfilter/nf_conntrack_netlink.c98
-rw-r--r--net/netfilter/nf_conntrack_sane.c19
-rw-r--r--net/netfilter/nf_conntrack_sip.c5
-rw-r--r--net/netfilter/nf_conntrack_standalone.c7
-rw-r--r--net/netfilter/nf_conntrack_tftp.c7
-rw-r--r--net/netfilter/nf_conntrack_timeout.c2
-rw-r--r--net/netfilter/nf_dup_netdev.c39
-rw-r--r--net/netfilter/nf_nat_redirect.c2
-rw-r--r--net/netfilter/nf_tables_api.c263
-rw-r--r--net/netfilter/nf_tables_core.c62
-rw-r--r--net/netfilter/nf_tables_inet.c2
-rw-r--r--net/netfilter/nf_tables_netdev.c57
-rw-r--r--net/netfilter/nf_tables_trace.c275
-rw-r--r--net/netfilter/nfnetlink.c55
-rw-r--r--net/netfilter/nfnetlink_acct.c24
-rw-r--r--net/netfilter/nfnetlink_cthelper.c18
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c122
-rw-r--r--net/netfilter/nfnetlink_log.c47
-rw-r--r--net/netfilter/nfnetlink_queue.c145
-rw-r--r--net/netfilter/nft_byteorder.c21
-rw-r--r--net/netfilter/nft_compat.c12
-rw-r--r--net/netfilter/nft_counter.c51
-rw-r--r--net/netfilter/nft_ct.c39
-rw-r--r--net/netfilter/nft_dup_netdev.c97
-rw-r--r--net/netfilter/nft_dynset.c5
-rw-r--r--net/netfilter/nft_fwd_netdev.c98
-rw-r--r--net/netfilter/nft_limit.c16
-rw-r--r--net/netfilter/nft_masq.c51
-rw-r--r--net/netfilter/nft_meta.c101
-rw-r--r--net/netfilter/nft_payload.c135
-rw-r--r--net/netfilter/x_tables.c80
-rw-r--r--net/netfilter/xt_CT.c2
-rw-r--r--net/netfilter/xt_TCPMSS.c9
-rw-r--r--net/netfilter/xt_TEE.c10
-rw-r--r--net/netfilter/xt_TPROXY.c31
-rw-r--r--net/netfilter/xt_cgroup.c108
-rw-r--r--net/netfilter/xt_osf.c9
-rw-r--r--net/netfilter/xt_owner.c6
-rw-r--r--net/netfilter/xt_socket.c28
61 files changed, 1924 insertions, 816 deletions
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index e22349ea7256..95e757c377f9 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -563,6 +563,28 @@ config NFT_COMPAT
x_tables match/target extensions over the nf_tables
framework.
+if NF_TABLES_NETDEV
+
+config NF_DUP_NETDEV
+ tristate "Netfilter packet duplication support"
+ help
+ This option enables the generic packet duplication infrastructure
+ for Netfilter.
+
+config NFT_DUP_NETDEV
+ tristate "Netfilter nf_tables netdev packet duplication support"
+ select NF_DUP_NETDEV
+ help
+ This option enables packet duplication for the "netdev" family.
+
+config NFT_FWD_NETDEV
+ tristate "Netfilter nf_tables netdev packet forwarding support"
+ select NF_DUP_NETDEV
+ help
+ This option enables packet forwarding for the "netdev" family.
+
+endif # NF_TABLES_NETDEV
+
endif # NF_TABLES
config NETFILTER_XTABLES
@@ -869,7 +891,7 @@ config NETFILTER_XT_TARGET_TEE
depends on IPV6 || IPV6=n
depends on !NF_CONNTRACK || NF_CONNTRACK
select NF_DUP_IPV4
- select NF_DUP_IPV6 if IP6_NF_IPTABLES
+ select NF_DUP_IPV6 if IPV6
---help---
This option adds a "TEE" target with which a packet can be cloned and
this clone be rerouted to another nexthop.
@@ -882,7 +904,7 @@ config NETFILTER_XT_TARGET_TPROXY
depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n
depends on IP_NF_MANGLE
select NF_DEFRAG_IPV4
- select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES
+ select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES != n
help
This option adds a `TPROXY' target, which is somewhat similar to
REDIRECT. It can only be used in the mangle table and is useful
@@ -1375,7 +1397,7 @@ config NETFILTER_XT_MATCH_SOCKET
depends on IPV6 || IPV6=n
depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n
select NF_DEFRAG_IPV4
- select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES
+ select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES != n
help
This option adds a `socket' match, which can be used to match
packets for which a TCP or UDP socket lookup finds a valid socket.
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 7638c36b498c..69134541d65b 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -66,8 +66,11 @@ obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o
# SYNPROXY
obj-$(CONFIG_NETFILTER_SYNPROXY) += nf_synproxy_core.o
+# generic packet duplication from netdev family
+obj-$(CONFIG_NF_DUP_NETDEV) += nf_dup_netdev.o
+
# nf_tables
-nf_tables-objs += nf_tables_core.o nf_tables_api.o
+nf_tables-objs += nf_tables_core.o nf_tables_api.o nf_tables_trace.o
nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o nft_dynset.o
nf_tables-objs += nft_bitwise.o nft_byteorder.o nft_payload.o
@@ -90,6 +93,10 @@ obj-$(CONFIG_NFT_LOG) += nft_log.o
obj-$(CONFIG_NFT_MASQ) += nft_masq.o
obj-$(CONFIG_NFT_REDIR) += nft_redir.o
+# nf_tables netdev
+obj-$(CONFIG_NFT_DUP_NETDEV) += nft_dup_netdev.o
+obj-$(CONFIG_NFT_FWD_NETDEV) += nft_fwd_netdev.o
+
# generic X tables
obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index d05e759ed0fa..2e8e7e5fb4a6 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -33,7 +33,7 @@
#define mtype_gc IPSET_TOKEN(MTYPE, _gc)
#define mtype MTYPE
-#define get_ext(set, map, id) ((map)->extensions + (set)->dsize * (id))
+#define get_ext(set, map, id) ((map)->extensions + ((set)->dsize * (id)))
static void
mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
@@ -67,12 +67,9 @@ mtype_destroy(struct ip_set *set)
del_timer_sync(&map->gc);
ip_set_free(map->members);
- if (set->dsize) {
- if (set->extensions & IPSET_EXT_DESTROY)
- mtype_ext_cleanup(set);
- ip_set_free(map->extensions);
- }
- kfree(map);
+ if (set->dsize && set->extensions & IPSET_EXT_DESTROY)
+ mtype_ext_cleanup(set);
+ ip_set_free(map);
set->data = NULL;
}
@@ -92,16 +89,14 @@ mtype_head(struct ip_set *set, struct sk_buff *skb)
{
const struct mtype *map = set->data;
struct nlattr *nested;
+ size_t memsize = sizeof(*map) + map->memsize;
nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
if (!nested)
goto nla_put_failure;
if (mtype_do_head(skb, map) ||
- nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) ||
- nla_put_net32(skb, IPSET_ATTR_MEMSIZE,
- htonl(sizeof(*map) +
- map->memsize +
- set->dsize * map->elements)))
+ nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) ||
+ nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)))
goto nla_put_failure;
if (unlikely(ip_set_put_flags(skb, set)))
goto nla_put_failure;
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index 64a564334418..4783efff0bde 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -41,7 +41,6 @@ MODULE_ALIAS("ip_set_bitmap:ip");
/* Type structure */
struct bitmap_ip {
void *members; /* the set members */
- void *extensions; /* data extensions */
u32 first_ip; /* host byte order, included in range */
u32 last_ip; /* host byte order, included in range */
u32 elements; /* number of max elements in the set */
@@ -49,6 +48,8 @@ struct bitmap_ip {
size_t memsize; /* members size */
u8 netmask; /* subnet netmask */
struct timer_list gc; /* garbage collection */
+ unsigned char extensions[0] /* data extensions */
+ __aligned(__alignof__(u64));
};
/* ADT structure for generic function args */
@@ -224,13 +225,6 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map,
map->members = ip_set_alloc(map->memsize);
if (!map->members)
return false;
- if (set->dsize) {
- map->extensions = ip_set_alloc(set->dsize * elements);
- if (!map->extensions) {
- kfree(map->members);
- return false;
- }
- }
map->first_ip = first_ip;
map->last_ip = last_ip;
map->elements = elements;
@@ -316,13 +310,13 @@ bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
pr_debug("hosts %u, elements %llu\n",
hosts, (unsigned long long)elements);
- map = kzalloc(sizeof(*map), GFP_KERNEL);
+ set->dsize = ip_set_elem_len(set, tb, 0, 0);
+ map = ip_set_alloc(sizeof(*map) + elements * set->dsize);
if (!map)
return -ENOMEM;
map->memsize = bitmap_bytes(0, elements - 1);
set->variant = &bitmap_ip;
- set->dsize = ip_set_elem_len(set, tb, 0);
if (!init_map_ip(set, map, first_ip, last_ip,
elements, hosts, netmask)) {
kfree(map);
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index 1430535118fb..9a065f672d3a 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -47,24 +47,26 @@ enum {
/* Type structure */
struct bitmap_ipmac {
void *members; /* the set members */
- void *extensions; /* MAC + data extensions */
u32 first_ip; /* host byte order, included in range */
u32 last_ip; /* host byte order, included in range */
u32 elements; /* number of max elements in the set */
size_t memsize; /* members size */
struct timer_list gc; /* garbage collector */
+ unsigned char extensions[0] /* MAC + data extensions */
+ __aligned(__alignof__(u64));
};
/* ADT structure for generic function args */
struct bitmap_ipmac_adt_elem {
+ unsigned char ether[ETH_ALEN] __aligned(2);
u16 id;
- unsigned char *ether;
+ u16 add_mac;
};
struct bitmap_ipmac_elem {
unsigned char ether[ETH_ALEN];
unsigned char filled;
-} __attribute__ ((aligned));
+} __aligned(__alignof__(u64));
static inline u32
ip_to_id(const struct bitmap_ipmac *m, u32 ip)
@@ -72,11 +74,11 @@ ip_to_id(const struct bitmap_ipmac *m, u32 ip)
return ip - m->first_ip;
}
-static inline struct bitmap_ipmac_elem *
-get_elem(void *extensions, u16 id, size_t dsize)
-{
- return (struct bitmap_ipmac_elem *)(extensions + id * dsize);
-}
+#define get_elem(extensions, id, dsize) \
+ (struct bitmap_ipmac_elem *)(extensions + (id) * (dsize))
+
+#define get_const_elem(extensions, id, dsize) \
+ (const struct bitmap_ipmac_elem *)(extensions + (id) * (dsize))
/* Common functions */
@@ -88,10 +90,9 @@ bitmap_ipmac_do_test(const struct bitmap_ipmac_adt_elem *e,
if (!test_bit(e->id, map->members))
return 0;
- elem = get_elem(map->extensions, e->id, dsize);
- if (elem->filled == MAC_FILLED)
- return !e->ether ||
- ether_addr_equal(e->ether, elem->ether);
+ elem = get_const_elem(map->extensions, e->id, dsize);
+ if (e->add_mac && elem->filled == MAC_FILLED)
+ return ether_addr_equal(e->ether, elem->ether);
/* Trigger kernel to fill out the ethernet address */
return -EAGAIN;
}
@@ -103,7 +104,7 @@ bitmap_ipmac_gc_test(u16 id, const struct bitmap_ipmac *map, size_t dsize)
if (!test_bit(id, map->members))
return 0;
- elem = get_elem(map->extensions, id, dsize);
+ elem = get_const_elem(map->extensions, id, dsize);
/* Timer not started for the incomplete elements */
return elem->filled == MAC_FILLED;
}
@@ -133,7 +134,7 @@ bitmap_ipmac_add_timeout(unsigned long *timeout,
* and we can reuse it later when MAC is filled out,
* possibly by the kernel
*/
- if (e->ether)
+ if (e->add_mac)
ip_set_timeout_set(timeout, t);
else
*timeout = t;
@@ -150,7 +151,7 @@ bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e,
elem = get_elem(map->extensions, e->id, dsize);
if (test_bit(e->id, map->members)) {
if (elem->filled == MAC_FILLED) {
- if (e->ether &&
+ if (e->add_mac &&
(flags & IPSET_FLAG_EXIST) &&
!ether_addr_equal(e->ether, elem->ether)) {
/* memcpy isn't atomic */
@@ -159,7 +160,7 @@ bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e,
ether_addr_copy(elem->ether, e->ether);
}
return IPSET_ADD_FAILED;
- } else if (!e->ether)
+ } else if (!e->add_mac)
/* Already added without ethernet address */
return IPSET_ADD_FAILED;
/* Fill the MAC address and trigger the timer activation */
@@ -168,7 +169,7 @@ bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e,
ether_addr_copy(elem->ether, e->ether);
elem->filled = MAC_FILLED;
return IPSET_ADD_START_STORED_TIMEOUT;
- } else if (e->ether) {
+ } else if (e->add_mac) {
/* We can store MAC too */
ether_addr_copy(elem->ether, e->ether);
elem->filled = MAC_FILLED;
@@ -191,7 +192,7 @@ bitmap_ipmac_do_list(struct sk_buff *skb, const struct bitmap_ipmac *map,
u32 id, size_t dsize)
{
const struct bitmap_ipmac_elem *elem =
- get_elem(map->extensions, id, dsize);
+ get_const_elem(map->extensions, id, dsize);
return nla_put_ipaddr4(skb, IPSET_ATTR_IP,
htonl(map->first_ip + id)) ||
@@ -213,7 +214,7 @@ bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb,
{
struct bitmap_ipmac *map = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct bitmap_ipmac_adt_elem e = { .id = 0 };
+ struct bitmap_ipmac_adt_elem e = { .id = 0, .add_mac = 1 };
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
u32 ip;
@@ -231,7 +232,7 @@ bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb,
return -EINVAL;
e.id = ip_to_id(map, ip);
- e.ether = eth_hdr(skb)->h_source;
+ memcpy(e.ether, eth_hdr(skb)->h_source, ETH_ALEN);
return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
}
@@ -265,11 +266,12 @@ bitmap_ipmac_uadt(struct ip_set *set, struct nlattr *tb[],
return -IPSET_ERR_BITMAP_RANGE;
e.id = ip_to_id(map, ip);
- if (tb[IPSET_ATTR_ETHER])
- e.ether = nla_data(tb[IPSET_ATTR_ETHER]);
- else
- e.ether = NULL;
-
+ if (tb[IPSET_ATTR_ETHER]) {
+ if (nla_len(tb[IPSET_ATTR_ETHER]) != ETH_ALEN)
+ return -IPSET_ERR_PROTOCOL;
+ memcpy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]), ETH_ALEN);
+ e.add_mac = 1;
+ }
ret = adtfn(set, &e, &ext, &ext, flags);
return ip_set_eexist(ret, flags) ? 0 : ret;
@@ -300,13 +302,6 @@ init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map,
map->members = ip_set_alloc(map->memsize);
if (!map->members)
return false;
- if (set->dsize) {
- map->extensions = ip_set_alloc(set->dsize * elements);
- if (!map->extensions) {
- kfree(map->members);
- return false;
- }
- }
map->first_ip = first_ip;
map->last_ip = last_ip;
map->elements = elements;
@@ -361,14 +356,15 @@ bitmap_ipmac_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
if (elements > IPSET_BITMAP_MAX_RANGE + 1)
return -IPSET_ERR_BITMAP_RANGE_SIZE;
- map = kzalloc(sizeof(*map), GFP_KERNEL);
+ set->dsize = ip_set_elem_len(set, tb,
+ sizeof(struct bitmap_ipmac_elem),
+ __alignof__(struct bitmap_ipmac_elem));
+ map = ip_set_alloc(sizeof(*map) + elements * set->dsize);
if (!map)
return -ENOMEM;
map->memsize = bitmap_bytes(0, elements - 1);
set->variant = &bitmap_ipmac;
- set->dsize = ip_set_elem_len(set, tb,
- sizeof(struct bitmap_ipmac_elem));
if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) {
kfree(map);
return -ENOMEM;
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index 5338ccd5da46..7f0c733358a4 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -35,12 +35,13 @@ MODULE_ALIAS("ip_set_bitmap:port");
/* Type structure */
struct bitmap_port {
void *members; /* the set members */
- void *extensions; /* data extensions */
u16 first_port; /* host byte order, included in range */
u16 last_port; /* host byte order, included in range */
u32 elements; /* number of max elements in the set */
size_t memsize; /* members size */
struct timer_list gc; /* garbage collection */
+ unsigned char extensions[0] /* data extensions */
+ __aligned(__alignof__(u64));
};
/* ADT structure for generic function args */
@@ -209,13 +210,6 @@ init_map_port(struct ip_set *set, struct bitmap_port *map,
map->members = ip_set_alloc(map->memsize);
if (!map->members)
return false;
- if (set->dsize) {
- map->extensions = ip_set_alloc(set->dsize * map->elements);
- if (!map->extensions) {
- kfree(map->members);
- return false;
- }
- }
map->first_port = first_port;
map->last_port = last_port;
set->timeout = IPSET_NO_TIMEOUT;
@@ -232,6 +226,7 @@ bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
{
struct bitmap_port *map;
u16 first_port, last_port;
+ u32 elements;
if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
!ip_set_attr_netorder(tb, IPSET_ATTR_PORT_TO) ||
@@ -248,14 +243,15 @@ bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
last_port = tmp;
}
- map = kzalloc(sizeof(*map), GFP_KERNEL);
+ elements = last_port - first_port + 1;
+ set->dsize = ip_set_elem_len(set, tb, 0, 0);
+ map = ip_set_alloc(sizeof(*map) + elements * set->dsize);
if (!map)
return -ENOMEM;
- map->elements = last_port - first_port + 1;
+ map->elements = elements;
map->memsize = bitmap_bytes(0, map->elements);
set->variant = &bitmap_port;
- set->dsize = ip_set_elem_len(set, tb, 0);
if (!init_map_port(set, map, first_port, last_port)) {
kfree(map);
return -ENOMEM;
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 69ab9c2634e1..a748b0c2c981 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -364,25 +364,27 @@ add_extension(enum ip_set_ext_id id, u32 flags, struct nlattr *tb[])
}
size_t
-ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len)
+ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len,
+ size_t align)
{
enum ip_set_ext_id id;
- size_t offset = len;
u32 cadt_flags = 0;
if (tb[IPSET_ATTR_CADT_FLAGS])
cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
if (cadt_flags & IPSET_FLAG_WITH_FORCEADD)
set->flags |= IPSET_CREATE_FLAG_FORCEADD;
+ if (!align)
+ align = 1;
for (id = 0; id < IPSET_EXT_ID_MAX; id++) {
if (!add_extension(id, cadt_flags, tb))
continue;
- offset = ALIGN(offset, ip_set_extensions[id].align);
- set->offset[id] = offset;
+ len = ALIGN(len, ip_set_extensions[id].align);
+ set->offset[id] = len;
set->extensions |= ip_set_extensions[id].type;
- offset += ip_set_extensions[id].len;
+ len += ip_set_extensions[id].len;
}
- return offset;
+ return ALIGN(len, align);
}
EXPORT_SYMBOL_GPL(ip_set_elem_len);
@@ -495,6 +497,26 @@ __ip_set_put(struct ip_set *set)
write_unlock_bh(&ip_set_ref_lock);
}
+/* set->ref can be swapped out by ip_set_swap, netlink events (like dump) need
+ * a separate reference counter
+ */
+static inline void
+__ip_set_get_netlink(struct ip_set *set)
+{
+ write_lock_bh(&ip_set_ref_lock);
+ set->ref_netlink++;
+ write_unlock_bh(&ip_set_ref_lock);
+}
+
+static inline void
+__ip_set_put_netlink(struct ip_set *set)
+{
+ write_lock_bh(&ip_set_ref_lock);
+ BUG_ON(set->ref_netlink == 0);
+ set->ref_netlink--;
+ write_unlock_bh(&ip_set_ref_lock);
+}
+
/* Add, del and test set entries from kernel.
*
* The set behind the index must exist and must be referenced
@@ -823,20 +845,17 @@ find_free_id(struct ip_set_net *inst, const char *name, ip_set_id_t *index,
return 0;
}
-static int
-ip_set_none(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const attr[])
+static int ip_set_none(struct net *net, struct sock *ctnl, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const attr[])
{
return -EOPNOTSUPP;
}
-static int
-ip_set_create(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const attr[])
+static int ip_set_create(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
+ const struct nlattr * const attr[])
{
- struct net *net = sock_net(ctnl);
struct ip_set_net *inst = ip_set_pernet(net);
struct ip_set *set, *clash = NULL;
ip_set_id_t index = IPSET_INVALID_ID;
@@ -974,12 +993,11 @@ ip_set_destroy_set(struct ip_set *set)
kfree(set);
}
-static int
-ip_set_destroy(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const attr[])
+static int ip_set_destroy(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
+ const struct nlattr * const attr[])
{
- struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
+ struct ip_set_net *inst = ip_set_pernet(net);
struct ip_set *s;
ip_set_id_t i;
int ret = 0;
@@ -987,6 +1005,9 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb,
if (unlikely(protocol_failed(attr)))
return -IPSET_ERR_PROTOCOL;
+ /* Must wait for flush to be really finished in list:set */
+ rcu_barrier();
+
/* Commands are serialized and references are
* protected by the ip_set_ref_lock.
* External systems (i.e. xt_set) must call
@@ -1001,7 +1022,7 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb,
if (!attr[IPSET_ATTR_SETNAME]) {
for (i = 0; i < inst->ip_set_max; i++) {
s = ip_set(inst, i);
- if (s && s->ref) {
+ if (s && (s->ref || s->ref_netlink)) {
ret = -IPSET_ERR_BUSY;
goto out;
}
@@ -1023,7 +1044,7 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb,
if (!s) {
ret = -ENOENT;
goto out;
- } else if (s->ref) {
+ } else if (s->ref || s->ref_netlink) {
ret = -IPSET_ERR_BUSY;
goto out;
}
@@ -1050,12 +1071,11 @@ ip_set_flush_set(struct ip_set *set)
spin_unlock_bh(&set->lock);
}
-static int
-ip_set_flush(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const attr[])
+static int ip_set_flush(struct net *net, struct sock *ctnl, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const attr[])
{
- struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
+ struct ip_set_net *inst = ip_set_pernet(net);
struct ip_set *s;
ip_set_id_t i;
@@ -1090,12 +1110,11 @@ ip_set_setname2_policy[IPSET_ATTR_CMD_MAX + 1] = {
.len = IPSET_MAXNAMELEN - 1 },
};
-static int
-ip_set_rename(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const attr[])
+static int ip_set_rename(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
+ const struct nlattr * const attr[])
{
- struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
+ struct ip_set_net *inst = ip_set_pernet(net);
struct ip_set *set, *s;
const char *name2;
ip_set_id_t i;
@@ -1140,12 +1159,11 @@ out:
* so the ip_set_list always contains valid pointers to the sets.
*/
-static int
-ip_set_swap(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const attr[])
+static int ip_set_swap(struct net *net, struct sock *ctnl, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const attr[])
{
- struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
+ struct ip_set_net *inst = ip_set_pernet(net);
struct ip_set *from, *to;
ip_set_id_t from_id, to_id;
char from_name[IPSET_MAXNAMELEN];
@@ -1173,6 +1191,9 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb,
from->family == to->family))
return -IPSET_ERR_TYPE_MISMATCH;
+ if (from->ref_netlink || to->ref_netlink)
+ return -EBUSY;
+
strncpy(from_name, from->name, IPSET_MAXNAMELEN);
strncpy(from->name, to->name, IPSET_MAXNAMELEN);
strncpy(to->name, from_name, IPSET_MAXNAMELEN);
@@ -1208,7 +1229,7 @@ ip_set_dump_done(struct netlink_callback *cb)
if (set->variant->uref)
set->variant->uref(set, cb, false);
pr_debug("release set %s\n", set->name);
- __ip_set_put_byindex(inst, index);
+ __ip_set_put_netlink(set);
}
return 0;
}
@@ -1330,7 +1351,7 @@ dump_last:
if (!cb->args[IPSET_CB_ARG0]) {
/* Start listing: make sure set won't be destroyed */
pr_debug("reference set\n");
- set->ref++;
+ set->ref_netlink++;
}
write_unlock_bh(&ip_set_ref_lock);
nlh = start_msg(skb, NETLINK_CB(cb->skb).portid,
@@ -1398,7 +1419,7 @@ release_refcount:
if (set->variant->uref)
set->variant->uref(set, cb, false);
pr_debug("release set %s\n", set->name);
- __ip_set_put_byindex(inst, index);
+ __ip_set_put_netlink(set);
cb->args[IPSET_CB_ARG0] = 0;
}
out:
@@ -1411,10 +1432,9 @@ out:
return ret < 0 ? ret : skb->len;
}
-static int
-ip_set_dump(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const attr[])
+static int ip_set_dump(struct net *net, struct sock *ctnl, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const attr[])
{
if (unlikely(protocol_failed(attr)))
return -IPSET_ERR_PROTOCOL;
@@ -1498,12 +1518,11 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
return ret;
}
-static int
-ip_set_uadd(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const attr[])
+static int ip_set_uadd(struct net *net, struct sock *ctnl, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const attr[])
{
- struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
+ struct ip_set_net *inst = ip_set_pernet(net);
struct ip_set *set;
struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {};
const struct nlattr *nla;
@@ -1553,12 +1572,11 @@ ip_set_uadd(struct sock *ctnl, struct sk_buff *skb,
return ret;
}
-static int
-ip_set_udel(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const attr[])
+static int ip_set_udel(struct net *net, struct sock *ctnl, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const attr[])
{
- struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
+ struct ip_set_net *inst = ip_set_pernet(net);
struct ip_set *set;
struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {};
const struct nlattr *nla;
@@ -1608,12 +1626,11 @@ ip_set_udel(struct sock *ctnl, struct sk_buff *skb,
return ret;
}
-static int
-ip_set_utest(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const attr[])
+static int ip_set_utest(struct net *net, struct sock *ctnl, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const attr[])
{
- struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
+ struct ip_set_net *inst = ip_set_pernet(net);
struct ip_set *set;
struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {};
int ret = 0;
@@ -1644,12 +1661,11 @@ ip_set_utest(struct sock *ctnl, struct sk_buff *skb,
/* Get headed data of a set */
-static int
-ip_set_header(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const attr[])
+static int ip_set_header(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
+ const struct nlattr * const attr[])
{
- struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
+ struct ip_set_net *inst = ip_set_pernet(net);
const struct ip_set *set;
struct sk_buff *skb2;
struct nlmsghdr *nlh2;
@@ -1701,10 +1717,9 @@ static const struct nla_policy ip_set_type_policy[IPSET_ATTR_CMD_MAX + 1] = {
[IPSET_ATTR_FAMILY] = { .type = NLA_U8 },
};
-static int
-ip_set_type(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const attr[])
+static int ip_set_type(struct net *net, struct sock *ctnl, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const attr[])
{
struct sk_buff *skb2;
struct nlmsghdr *nlh2;
@@ -1760,10 +1775,9 @@ ip_set_protocol_policy[IPSET_ATTR_CMD_MAX + 1] = {
[IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 },
};
-static int
-ip_set_protocol(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const attr[])
+static int ip_set_protocol(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
+ const struct nlattr * const attr[])
{
struct sk_buff *skb2;
struct nlmsghdr *nlh2;
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 691b54fcaf2a..d32fd6b036bf 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -72,8 +72,9 @@ struct hbucket {
DECLARE_BITMAP(used, AHASH_MAX_TUNED);
u8 size; /* size of the array */
u8 pos; /* position of the first free entry */
- unsigned char value[0]; /* the array of the values */
-} __attribute__ ((aligned));
+ unsigned char value[0] /* the array of the values */
+ __aligned(__alignof__(u64));
+};
/* The hash table: the table size stored here in order to make resizing easy */
struct htable {
@@ -475,7 +476,7 @@ static void
mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize)
{
struct htable *t;
- struct hbucket *n;
+ struct hbucket *n, *tmp;
struct mtype_elem *data;
u32 i, j, d;
#ifdef IP_SET_HASH_WITH_NETS
@@ -510,9 +511,14 @@ mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize)
}
}
if (d >= AHASH_INIT_SIZE) {
- struct hbucket *tmp = kzalloc(sizeof(*tmp) +
- (n->size - AHASH_INIT_SIZE) * dsize,
- GFP_ATOMIC);
+ if (d >= n->size) {
+ rcu_assign_pointer(hbucket(t, i), NULL);
+ kfree_rcu(n, rcu);
+ continue;
+ }
+ tmp = kzalloc(sizeof(*tmp) +
+ (n->size - AHASH_INIT_SIZE) * dsize,
+ GFP_ATOMIC);
if (!tmp)
/* Still try to delete expired elements */
continue;
@@ -522,7 +528,7 @@ mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize)
continue;
data = ahash_data(n, j, dsize);
memcpy(tmp->value + d * dsize, data, dsize);
- set_bit(j, tmp->used);
+ set_bit(d, tmp->used);
d++;
}
tmp->pos = d;
@@ -1076,7 +1082,7 @@ mtype_head(struct ip_set *set, struct sk_buff *skb)
if (nla_put_u32(skb, IPSET_ATTR_MARKMASK, h->markmask))
goto nla_put_failure;
#endif
- if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) ||
+ if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) ||
nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)))
goto nla_put_failure;
if (unlikely(ip_set_put_flags(skb, set)))
@@ -1323,12 +1329,14 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
#endif
set->variant = &IPSET_TOKEN(HTYPE, 4_variant);
set->dsize = ip_set_elem_len(set, tb,
- sizeof(struct IPSET_TOKEN(HTYPE, 4_elem)));
+ sizeof(struct IPSET_TOKEN(HTYPE, 4_elem)),
+ __alignof__(struct IPSET_TOKEN(HTYPE, 4_elem)));
#ifndef IP_SET_PROTO_UNDEF
} else {
set->variant = &IPSET_TOKEN(HTYPE, 6_variant);
set->dsize = ip_set_elem_len(set, tb,
- sizeof(struct IPSET_TOKEN(HTYPE, 6_elem)));
+ sizeof(struct IPSET_TOKEN(HTYPE, 6_elem)),
+ __alignof__(struct IPSET_TOKEN(HTYPE, 6_elem)));
}
#endif
if (tb[IPSET_ATTR_TIMEOUT]) {
diff --git a/net/netfilter/ipset/ip_set_hash_mac.c b/net/netfilter/ipset/ip_set_hash_mac.c
index f1e7d2c0f685..8f004edad396 100644
--- a/net/netfilter/ipset/ip_set_hash_mac.c
+++ b/net/netfilter/ipset/ip_set_hash_mac.c
@@ -110,7 +110,8 @@ hash_mac4_uadt(struct ip_set *set, struct nlattr *tb[],
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
- if (unlikely(!tb[IPSET_ATTR_ETHER]))
+ if (unlikely(!tb[IPSET_ATTR_ETHER] ||
+ nla_len(tb[IPSET_ATTR_ETHER]) != ETH_ALEN))
return -IPSET_ERR_PROTOCOL;
ret = ip_set_get_extensions(set, tb, &ext);
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index 43d8c9896fa3..f0f688db6213 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -164,8 +164,6 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb,
};
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
- if (e.cidr == 0)
- return -EINVAL;
if (adt == IPSET_TEST)
e.cidr = HOST_MASK;
@@ -377,8 +375,6 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb,
};
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
- if (e.cidr == 0)
- return -EINVAL;
if (adt == IPSET_TEST)
e.cidr = HOST_MASK;
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index 5a30ce6e8c90..a2a89e4e0a14 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -30,8 +30,9 @@ MODULE_ALIAS("ip_set_list:set");
struct set_elem {
struct rcu_head rcu;
struct list_head list;
+ struct ip_set *set; /* Sigh, in order to cleanup reference */
ip_set_id_t id;
-};
+} __aligned(__alignof__(u64));
struct set_adt_elem {
ip_set_id_t id;
@@ -151,30 +152,29 @@ list_set_kadt(struct ip_set *set, const struct sk_buff *skb,
/* Userspace interfaces: we are protected by the nfnl mutex */
static void
-__list_set_del(struct ip_set *set, struct set_elem *e)
+__list_set_del_rcu(struct rcu_head * rcu)
{
+ struct set_elem *e = container_of(rcu, struct set_elem, rcu);
+ struct ip_set *set = e->set;
struct list_set *map = set->data;
ip_set_put_byindex(map->net, e->id);
- /* We may call it, because we don't have a to be destroyed
- * extension which is used by the kernel.
- */
ip_set_ext_destroy(set, e);
- kfree_rcu(e, rcu);
+ kfree(e);
}
static inline void
list_set_del(struct ip_set *set, struct set_elem *e)
{
list_del_rcu(&e->list);
- __list_set_del(set, e);
+ call_rcu(&e->rcu, __list_set_del_rcu);
}
static inline void
-list_set_replace(struct ip_set *set, struct set_elem *e, struct set_elem *old)
+list_set_replace(struct set_elem *e, struct set_elem *old)
{
list_replace_rcu(&old->list, &e->list);
- __list_set_del(set, old);
+ call_rcu(&old->rcu, __list_set_del_rcu);
}
static void
@@ -244,9 +244,6 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
struct set_elem *e, *n, *prev, *next;
bool flag_exist = flags & IPSET_FLAG_EXIST;
- if (SET_WITH_TIMEOUT(set))
- set_cleanup_entries(set);
-
/* Find where to add the new entry */
n = prev = next = NULL;
list_for_each_entry(e, &map->members, list) {
@@ -301,10 +298,11 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
if (!e)
return -ENOMEM;
e->id = d->id;
+ e->set = set;
INIT_LIST_HEAD(&e->list);
list_set_init_extensions(set, ext, e);
if (n)
- list_set_replace(set, e, n);
+ list_set_replace(e, n);
else if (next)
list_add_tail_rcu(&e->list, &next->list);
else if (prev)
@@ -431,6 +429,7 @@ list_set_destroy(struct ip_set *set)
if (SET_WITH_TIMEOUT(set))
del_timer_sync(&map->gc);
+
list_for_each_entry_safe(e, n, &map->members, list) {
list_del(&e->list);
ip_set_put_byindex(map->net, e->id);
@@ -450,14 +449,16 @@ list_set_head(struct ip_set *set, struct sk_buff *skb)
struct set_elem *e;
u32 n = 0;
- list_for_each_entry(e, &map->members, list)
+ rcu_read_lock();
+ list_for_each_entry_rcu(e, &map->members, list)
n++;
+ rcu_read_unlock();
nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
if (!nested)
goto nla_put_failure;
if (nla_put_net32(skb, IPSET_ATTR_SIZE, htonl(map->size)) ||
- nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) ||
+ nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) ||
nla_put_net32(skb, IPSET_ATTR_MEMSIZE,
htonl(sizeof(*map) + n * set->dsize)))
goto nla_put_failure;
@@ -483,33 +484,25 @@ list_set_list(const struct ip_set *set,
atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
if (!atd)
return -EMSGSIZE;
- list_for_each_entry(e, &map->members, list) {
- if (i == first)
- break;
- i++;
- }
rcu_read_lock();
- list_for_each_entry_from(e, &map->members, list) {
- i++;
- if (SET_WITH_TIMEOUT(set) &&
- ip_set_timeout_expired(ext_timeout(e, set)))
+ list_for_each_entry_rcu(e, &map->members, list) {
+ if (i < first ||
+ (SET_WITH_TIMEOUT(set) &&
+ ip_set_timeout_expired(ext_timeout(e, set)))) {
+ i++;
continue;
+ }
nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
- if (!nested) {
- if (i == first) {
- nla_nest_cancel(skb, atd);
- ret = -EMSGSIZE;
- goto out;
- }
+ if (!nested)
goto nla_put_failure;
- }
if (nla_put_string(skb, IPSET_ATTR_NAME,
ip_set_name_byindex(map->net, e->id)))
goto nla_put_failure;
if (ip_set_put_extensions(skb, set, e, true))
goto nla_put_failure;
ipset_nest_end(skb, nested);
+ i++;
}
ipset_nest_end(skb, atd);
@@ -520,10 +513,12 @@ list_set_list(const struct ip_set *set,
nla_put_failure:
nla_nest_cancel(skb, nested);
if (unlikely(i == first)) {
+ nla_nest_cancel(skb, atd);
cb->args[IPSET_CB_ARG0] = 0;
ret = -EMSGSIZE;
+ } else {
+ cb->args[IPSET_CB_ARG0] = i;
}
- cb->args[IPSET_CB_ARG0] = i - 1;
ipset_nest_end(skb, atd);
out:
rcu_read_unlock();
@@ -618,7 +613,8 @@ list_set_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
size = IP_SET_LIST_MIN_SIZE;
set->variant = &set_variant;
- set->dsize = ip_set_elem_len(set, tb, sizeof(struct set_elem));
+ set->dsize = ip_set_elem_len(set, tb, sizeof(struct set_elem),
+ __alignof__(struct set_elem));
if (!init_list_set(net, set, size))
return -ENOMEM;
if (tb[IPSET_ATTR_TIMEOUT]) {
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index 0328f7250693..299edc6add5a 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -605,17 +605,13 @@ static const struct file_operations ip_vs_app_fops = {
int __net_init ip_vs_app_net_init(struct netns_ipvs *ipvs)
{
- struct net *net = ipvs->net;
-
INIT_LIST_HEAD(&ipvs->app_list);
- proc_create("ip_vs_app", 0, net->proc_net, &ip_vs_app_fops);
+ proc_create("ip_vs_app", 0, ipvs->net->proc_net, &ip_vs_app_fops);
return 0;
}
void __net_exit ip_vs_app_net_cleanup(struct netns_ipvs *ipvs)
{
- struct net *net = ipvs->net;
-
unregister_ip_vs_app(ipvs, NULL /* all */);
- remove_proc_entry("ip_vs_app", net->proc_net);
+ remove_proc_entry("ip_vs_app", ipvs->net->proc_net);
}
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 1e24fff53e4b..b9a4082afa3a 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1089,6 +1089,7 @@ static inline bool is_new_conn_expected(const struct ip_vs_conn *cp,
switch (cp->protocol) {
case IPPROTO_TCP:
return (cp->state == IP_VS_TCP_S_TIME_WAIT) ||
+ (cp->state == IP_VS_TCP_S_CLOSE) ||
((conn_reuse_mode & 2) &&
(cp->state == IP_VS_TCP_S_FIN_WAIT) &&
(cp->flags & IP_VS_CONN_F_NOOUTPUT));
@@ -1176,6 +1177,7 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in
struct ip_vs_protocol *pp;
struct ip_vs_proto_data *pd;
struct ip_vs_conn *cp;
+ struct sock *sk;
EnterFunction(11);
@@ -1183,13 +1185,12 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in
if (skb->ipvs_property)
return NF_ACCEPT;
+ sk = skb_to_full_sk(skb);
/* Bad... Do not break raw sockets */
- if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT &&
+ if (unlikely(sk && hooknum == NF_INET_LOCAL_OUT &&
af == AF_INET)) {
- struct sock *sk = skb->sk;
- struct inet_sock *inet = inet_sk(skb->sk);
- if (inet && sk->sk_family == PF_INET && inet->nodefrag)
+ if (sk->sk_family == PF_INET && inet_sk(sk)->nodefrag)
return NF_ACCEPT;
}
@@ -1681,6 +1682,7 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int
struct ip_vs_conn *cp;
int ret, pkts;
int conn_reuse_mode;
+ struct sock *sk;
/* Already marked as IPVS request or reply? */
if (skb->ipvs_property)
@@ -1708,12 +1710,11 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int
ip_vs_fill_iph_skb(af, skb, false, &iph);
/* Bad... Do not break raw sockets */
- if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT &&
+ sk = skb_to_full_sk(skb);
+ if (unlikely(sk && hooknum == NF_INET_LOCAL_OUT &&
af == AF_INET)) {
- struct sock *sk = skb->sk;
- struct inet_sock *inet = inet_sk(skb->sk);
- if (inet && sk->sk_family == PF_INET && inet->nodefrag)
+ if (sk->sk_family == PF_INET && inet_sk(sk)->nodefrag)
return NF_ACCEPT;
}
@@ -1757,15 +1758,34 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int
cp = pp->conn_in_get(ipvs, af, skb, &iph);
conn_reuse_mode = sysctl_conn_reuse_mode(ipvs);
- if (conn_reuse_mode && !iph.fragoffs &&
- is_new_conn(skb, &iph) && cp &&
- ((unlikely(sysctl_expire_nodest_conn(ipvs)) && cp->dest &&
- unlikely(!atomic_read(&cp->dest->weight))) ||
- unlikely(is_new_conn_expected(cp, conn_reuse_mode)))) {
- if (!atomic_read(&cp->n_control))
- ip_vs_conn_expire_now(cp);
- __ip_vs_conn_put(cp);
- cp = NULL;
+ if (conn_reuse_mode && !iph.fragoffs && is_new_conn(skb, &iph) && cp) {
+ bool uses_ct = false, resched = false;
+
+ if (unlikely(sysctl_expire_nodest_conn(ipvs)) && cp->dest &&
+ unlikely(!atomic_read(&cp->dest->weight))) {
+ resched = true;
+ uses_ct = ip_vs_conn_uses_conntrack(cp, skb);
+ } else if (is_new_conn_expected(cp, conn_reuse_mode)) {
+ uses_ct = ip_vs_conn_uses_conntrack(cp, skb);
+ if (!atomic_read(&cp->n_control)) {
+ resched = true;
+ } else {
+ /* Do not reschedule controlling connection
+ * that uses conntrack while it is still
+ * referenced by controlled connection(s).
+ */
+ resched = !uses_ct;
+ }
+ }
+
+ if (resched) {
+ if (!atomic_read(&cp->n_control))
+ ip_vs_conn_expire_now(cp);
+ __ip_vs_conn_put(cp);
+ if (uses_ct)
+ return NF_DROP;
+ cp = NULL;
+ }
}
if (unlikely(!cp)) {
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index e7c1b052c2a3..404b2a4f4b5b 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1376,8 +1376,6 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)
struct ip_vs_pe *old_pe;
struct netns_ipvs *ipvs = svc->ipvs;
- pr_info("%s: enter\n", __func__);
-
/* Count only IPv4 services for old get/setsockopt interface */
if (svc->af == AF_INET)
ipvs->num_services--;
@@ -3947,7 +3945,6 @@ static struct notifier_block ip_vs_dst_notifier = {
int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs)
{
- struct net *net = ipvs->net;
int i, idx;
/* Initialize rs_table */
@@ -3974,9 +3971,9 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs)
spin_lock_init(&ipvs->tot_stats.lock);
- proc_create("ip_vs", 0, net->proc_net, &ip_vs_info_fops);
- proc_create("ip_vs_stats", 0, net->proc_net, &ip_vs_stats_fops);
- proc_create("ip_vs_stats_percpu", 0, net->proc_net,
+ proc_create("ip_vs", 0, ipvs->net->proc_net, &ip_vs_info_fops);
+ proc_create("ip_vs_stats", 0, ipvs->net->proc_net, &ip_vs_stats_fops);
+ proc_create("ip_vs_stats_percpu", 0, ipvs->net->proc_net,
&ip_vs_stats_percpu_fops);
if (ip_vs_control_net_init_sysctl(ipvs))
@@ -3991,13 +3988,11 @@ err:
void __net_exit ip_vs_control_net_cleanup(struct netns_ipvs *ipvs)
{
- struct net *net = ipvs->net;
-
ip_vs_trash_cleanup(ipvs);
ip_vs_control_net_cleanup_sysctl(ipvs);
- remove_proc_entry("ip_vs_stats_percpu", net->proc_net);
- remove_proc_entry("ip_vs_stats", net->proc_net);
- remove_proc_entry("ip_vs", net->proc_net);
+ remove_proc_entry("ip_vs_stats_percpu", ipvs->net->proc_net);
+ remove_proc_entry("ip_vs_stats", ipvs->net->proc_net);
+ remove_proc_entry("ip_vs", ipvs->net->proc_net);
free_percpu(ipvs->tot_stats.cpustats);
}
diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c
index 1b8d594e493a..0a6eb5c0d9e9 100644
--- a/net/netfilter/ipvs/ip_vs_pe_sip.c
+++ b/net/netfilter/ipvs/ip_vs_pe_sip.c
@@ -70,10 +70,10 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
const char *dptr;
int retc;
- ip_vs_fill_iph_skb(p->af, skb, false, &iph);
+ retc = ip_vs_fill_iph_skb(p->af, skb, false, &iph);
/* Only useful with UDP */
- if (iph.protocol != IPPROTO_UDP)
+ if (!retc || iph.protocol != IPPROTO_UDP)
return -EINVAL;
/* todo: IPv6 fragments:
* I think this only should be done for the first fragment. /HS
@@ -88,7 +88,7 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
dptr = skb->data + dataoff;
datalen = skb->len - dataoff;
- if (get_callid(dptr, dataoff, datalen, &matchoff, &matchlen))
+ if (get_callid(dptr, 0, datalen, &matchoff, &matchlen))
return -EINVAL;
/* N.B: pe_data is only set on success,
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 010ddeec135f..d952d67f904d 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -169,7 +169,7 @@ sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
/* Only update csum if we really have to */
if (sctph->dest != cp->dport || payload_csum ||
(skb->ip_summed == CHECKSUM_PARTIAL &&
- !(skb_dst(skb)->dev->features & NETIF_F_SCTP_CSUM))) {
+ !(skb_dst(skb)->dev->features & NETIF_F_SCTP_CRC))) {
sctph->dest = cp->dport;
sctp_nat_csum(skb, sctph, sctphoff);
} else if (skb->ip_summed != CHECKSUM_PARTIAL) {
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 3264cb49b333..dc196a0f501d 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -531,8 +531,6 @@ static inline int ip_vs_tunnel_xmit_prepare(struct sk_buff *skb,
if (ret == NF_ACCEPT) {
nf_reset(skb);
skb_forward_csum(skb);
- if (!skb->sk)
- skb_sender_cpu_clear(skb);
}
return ret;
}
@@ -573,8 +571,6 @@ static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb,
if (!local) {
skb_forward_csum(skb);
- if (!skb->sk)
- skb_sender_cpu_clear(skb);
NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb,
NULL, skb_dst(skb)->dev, dst_output);
} else
@@ -595,8 +591,6 @@ static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb,
if (!local) {
ip_vs_drop_early_demux_sk(skb);
skb_forward_csum(skb);
- if (!skb->sk)
- skb_sender_cpu_clear(skb);
NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb,
NULL, skb_dst(skb)->dev, dst_output);
} else
@@ -1019,8 +1013,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
if (IS_ERR(skb))
goto tx_error;
- skb = iptunnel_handle_offloads(
- skb, false, __tun_gso_type_mask(AF_INET, cp->af));
+ skb = iptunnel_handle_offloads(skb, __tun_gso_type_mask(AF_INET, cp->af));
if (IS_ERR(skb))
goto tx_error;
@@ -1112,8 +1105,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
if (IS_ERR(skb))
goto tx_error;
- skb = iptunnel_handle_offloads(
- skb, false, __tun_gso_type_mask(AF_INET6, cp->af));
+ skb = iptunnel_handle_offloads(skb, __tun_gso_type_mask(AF_INET6, cp->af));
if (IS_ERR(skb))
goto tx_error;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 3cb3cb831591..afde5f5e728a 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -66,6 +66,20 @@ EXPORT_SYMBOL_GPL(nf_conntrack_locks);
__cacheline_aligned_in_smp DEFINE_SPINLOCK(nf_conntrack_expect_lock);
EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock);
+static __read_mostly spinlock_t nf_conntrack_locks_all_lock;
+static __read_mostly bool nf_conntrack_locks_all;
+
+void nf_conntrack_lock(spinlock_t *lock) __acquires(lock)
+{
+ spin_lock(lock);
+ while (unlikely(nf_conntrack_locks_all)) {
+ spin_unlock(lock);
+ spin_unlock_wait(&nf_conntrack_locks_all_lock);
+ spin_lock(lock);
+ }
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_lock);
+
static void nf_conntrack_double_unlock(unsigned int h1, unsigned int h2)
{
h1 %= CONNTRACK_LOCKS;
@@ -82,12 +96,12 @@ static bool nf_conntrack_double_lock(struct net *net, unsigned int h1,
h1 %= CONNTRACK_LOCKS;
h2 %= CONNTRACK_LOCKS;
if (h1 <= h2) {
- spin_lock(&nf_conntrack_locks[h1]);
+ nf_conntrack_lock(&nf_conntrack_locks[h1]);
if (h1 != h2)
spin_lock_nested(&nf_conntrack_locks[h2],
SINGLE_DEPTH_NESTING);
} else {
- spin_lock(&nf_conntrack_locks[h2]);
+ nf_conntrack_lock(&nf_conntrack_locks[h2]);
spin_lock_nested(&nf_conntrack_locks[h1],
SINGLE_DEPTH_NESTING);
}
@@ -102,16 +116,18 @@ static void nf_conntrack_all_lock(void)
{
int i;
- for (i = 0; i < CONNTRACK_LOCKS; i++)
- spin_lock_nested(&nf_conntrack_locks[i], i);
+ spin_lock(&nf_conntrack_locks_all_lock);
+ nf_conntrack_locks_all = true;
+
+ for (i = 0; i < CONNTRACK_LOCKS; i++) {
+ spin_unlock_wait(&nf_conntrack_locks[i]);
+ }
}
static void nf_conntrack_all_unlock(void)
{
- int i;
-
- for (i = 0; i < CONNTRACK_LOCKS; i++)
- spin_unlock(&nf_conntrack_locks[i]);
+ nf_conntrack_locks_all = false;
+ spin_unlock(&nf_conntrack_locks_all_lock);
}
unsigned int nf_conntrack_htable_size __read_mostly;
@@ -757,7 +773,7 @@ restart:
hash = hash_bucket(_hash, net);
for (; i < net->ct.htable_size; i++) {
lockp = &nf_conntrack_locks[hash % CONNTRACK_LOCKS];
- spin_lock(lockp);
+ nf_conntrack_lock(lockp);
if (read_seqcount_retry(&net->ct.generation, sequence)) {
spin_unlock(lockp);
goto restart;
@@ -1382,7 +1398,7 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
for (; *bucket < net->ct.htable_size; (*bucket)++) {
lockp = &nf_conntrack_locks[*bucket % CONNTRACK_LOCKS];
local_bh_disable();
- spin_lock(lockp);
+ nf_conntrack_lock(lockp);
if (*bucket < net->ct.htable_size) {
hlist_nulls_for_each_entry(h, n, &net->ct.hash[*bucket], hnnode) {
if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
@@ -1394,6 +1410,7 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
}
spin_unlock(lockp);
local_bh_enable();
+ cond_resched();
}
for_each_possible_cpu(cpu) {
@@ -1406,6 +1423,7 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
set_bit(IPS_DYING_BIT, &ct->status);
}
spin_unlock_bh(&pcpu->lock);
+ cond_resched();
}
return NULL;
found:
@@ -1422,6 +1440,8 @@ void nf_ct_iterate_cleanup(struct net *net,
struct nf_conn *ct;
unsigned int bucket = 0;
+ might_sleep();
+
while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) {
/* Time to push up daises... */
if (del_timer(&ct->timeout))
@@ -1430,6 +1450,7 @@ void nf_ct_iterate_cleanup(struct net *net,
/* ... else the timer will get him soon. */
nf_ct_put(ct);
+ cond_resched();
}
}
EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup);
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index acf5c7b3f378..278927ab0948 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -596,11 +596,18 @@ static int exp_proc_init(struct net *net)
{
#ifdef CONFIG_NF_CONNTRACK_PROCFS
struct proc_dir_entry *proc;
+ kuid_t root_uid;
+ kgid_t root_gid;
proc = proc_create("nf_conntrack_expect", 0440, net->proc_net,
&exp_file_ops);
if (!proc)
return -ENOMEM;
+
+ root_uid = make_kuid(net->user_ns, 0);
+ root_gid = make_kgid(net->user_ns, 0);
+ if (uid_valid(root_uid) && gid_valid(root_gid))
+ proc_set_user(proc, root_uid, root_gid);
#endif /* CONFIG_NF_CONNTRACK_PROCFS */
return 0;
}
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index b666959f17c0..883c691ec8d0 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -10,6 +10,8 @@
* published by the Free Software Foundation.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/netfilter.h>
@@ -505,11 +507,11 @@ skip_nl_seq:
different IP address. Simply don't record it for
NAT. */
if (cmd.l3num == PF_INET) {
- pr_debug("conntrack_ftp: NOT RECORDING: %pI4 != %pI4\n",
+ pr_debug("NOT RECORDING: %pI4 != %pI4\n",
&cmd.u3.ip,
&ct->tuplehash[dir].tuple.src.u3.ip);
} else {
- pr_debug("conntrack_ftp: NOT RECORDING: %pI6 != %pI6\n",
+ pr_debug("NOT RECORDING: %pI6 != %pI6\n",
cmd.u3.ip6,
ct->tuplehash[dir].tuple.src.u3.ip6);
}
@@ -586,8 +588,7 @@ static void nf_conntrack_ftp_fini(void)
if (ftp[i][j].me == NULL)
continue;
- pr_debug("nf_ct_ftp: unregistering helper for pf: %d "
- "port: %d\n",
+ pr_debug("unregistering helper for pf: %d port: %d\n",
ftp[i][j].tuple.src.l3num, ports[i]);
nf_conntrack_helper_unregister(&ftp[i][j]);
}
@@ -625,14 +626,12 @@ static int __init nf_conntrack_ftp_init(void)
else
sprintf(ftp[i][j].name, "ftp-%d", ports[i]);
- pr_debug("nf_ct_ftp: registering helper for pf: %d "
- "port: %d\n",
+ pr_debug("registering helper for pf: %d port: %d\n",
ftp[i][j].tuple.src.l3num, ports[i]);
ret = nf_conntrack_helper_register(&ftp[i][j]);
if (ret) {
- printk(KERN_ERR "nf_ct_ftp: failed to register"
- " helper for pf: %d port: %d\n",
- ftp[i][j].tuple.src.l3num, ports[i]);
+ pr_err("failed to register helper for pf: %d port: %d\n",
+ ftp[i][j].tuple.src.l3num, ports[i]);
nf_conntrack_ftp_fini();
return ret;
}
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index bd9d31537905..3b40ec575cd5 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -425,7 +425,7 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
}
local_bh_disable();
for (i = 0; i < net->ct.htable_size; i++) {
- spin_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
+ nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
if (i < net->ct.htable_size) {
hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode)
unhelp(h, me);
diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c
index 0fd2976db7ee..8b6da2719600 100644
--- a/net/netfilter/nf_conntrack_irc.c
+++ b/net/netfilter/nf_conntrack_irc.c
@@ -9,6 +9,8 @@
* 2 of the License, or (at your option) any later version.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/skbuff.h>
@@ -237,7 +239,7 @@ static int __init nf_conntrack_irc_init(void)
int i, ret;
if (max_dcc_channels < 1) {
- printk(KERN_ERR "nf_ct_irc: max_dcc_channels must not be zero\n");
+ pr_err("max_dcc_channels must not be zero\n");
return -EINVAL;
}
@@ -267,8 +269,7 @@ static int __init nf_conntrack_irc_init(void)
ret = nf_conntrack_helper_register(&irc[i]);
if (ret) {
- printk(KERN_ERR "nf_ct_irc: failed to register helper "
- "for pf: %u port: %u\n",
+ pr_err("failed to register helper for pf: %u port: %u\n",
irc[i].tuple.src.l3num, ports[i]);
nf_conntrack_irc_fini();
return ret;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 9f5272968abb..355e8552fd5b 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -840,7 +840,7 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
for (; cb->args[0] < net->ct.htable_size; cb->args[0]++) {
restart:
lockp = &nf_conntrack_locks[cb->args[0] % CONNTRACK_LOCKS];
- spin_lock(lockp);
+ nf_conntrack_lock(lockp);
if (cb->args[0] >= net->ct.htable_size) {
spin_unlock(lockp);
goto out;
@@ -1113,12 +1113,11 @@ static int ctnetlink_flush_conntrack(struct net *net,
return 0;
}
-static int
-ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const cda[])
+static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const cda[])
{
- struct net *net = sock_net(ctnl);
struct nf_conntrack_tuple_hash *h;
struct nf_conntrack_tuple tuple;
struct nf_conn *ct;
@@ -1168,12 +1167,11 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
return 0;
}
-static int
-ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const cda[])
+static int ctnetlink_get_conntrack(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const cda[])
{
- struct net *net = sock_net(ctnl);
struct nf_conntrack_tuple_hash *h;
struct nf_conntrack_tuple tuple;
struct nf_conn *ct;
@@ -1330,10 +1328,10 @@ ctnetlink_dump_dying(struct sk_buff *skb, struct netlink_callback *cb)
return ctnetlink_dump_list(skb, cb, true);
}
-static int
-ctnetlink_get_ct_dying(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const cda[])
+static int ctnetlink_get_ct_dying(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const cda[])
{
if (nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
@@ -1352,10 +1350,10 @@ ctnetlink_dump_unconfirmed(struct sk_buff *skb, struct netlink_callback *cb)
return ctnetlink_dump_list(skb, cb, false);
}
-static int
-ctnetlink_get_ct_unconfirmed(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const cda[])
+static int ctnetlink_get_ct_unconfirmed(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const cda[])
{
if (nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
@@ -1865,12 +1863,11 @@ err1:
return ERR_PTR(err);
}
-static int
-ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const cda[])
+static int ctnetlink_new_conntrack(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const cda[])
{
- struct net *net = sock_net(ctnl);
struct nf_conntrack_tuple otuple, rtuple;
struct nf_conntrack_tuple_hash *h = NULL;
struct nfgenmsg *nfmsg = nlmsg_data(nlh);
@@ -2034,10 +2031,10 @@ ctnetlink_ct_stat_cpu_dump(struct sk_buff *skb, struct netlink_callback *cb)
return skb->len;
}
-static int
-ctnetlink_stat_ct_cpu(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const cda[])
+static int ctnetlink_stat_ct_cpu(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const cda[])
{
if (nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
@@ -2080,10 +2077,9 @@ nlmsg_failure:
return -1;
}
-static int
-ctnetlink_stat_ct(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const cda[])
+static int ctnetlink_stat_ct(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
+ const struct nlattr * const cda[])
{
struct sk_buff *skb2;
int err;
@@ -2729,12 +2725,12 @@ out:
return skb->len;
}
-static int ctnetlink_dump_exp_ct(struct sock *ctnl, struct sk_buff *skb,
+static int ctnetlink_dump_exp_ct(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const cda[])
{
int err;
- struct net *net = sock_net(ctnl);
struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u_int8_t u3 = nfmsg->nfgen_family;
struct nf_conntrack_tuple tuple;
@@ -2768,12 +2764,10 @@ static int ctnetlink_dump_exp_ct(struct sock *ctnl, struct sk_buff *skb,
return err;
}
-static int
-ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const cda[])
+static int ctnetlink_get_expect(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
+ const struct nlattr * const cda[])
{
- struct net *net = sock_net(ctnl);
struct nf_conntrack_tuple tuple;
struct nf_conntrack_expect *exp;
struct sk_buff *skb2;
@@ -2784,7 +2778,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
if (nlh->nlmsg_flags & NLM_F_DUMP) {
if (cda[CTA_EXPECT_MASTER])
- return ctnetlink_dump_exp_ct(ctnl, skb, nlh, cda);
+ return ctnetlink_dump_exp_ct(net, ctnl, skb, nlh, cda);
else {
struct netlink_dump_control c = {
.dump = ctnetlink_exp_dump_table,
@@ -2850,12 +2844,10 @@ out:
return err == -EAGAIN ? -ENOBUFS : err;
}
-static int
-ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const cda[])
+static int ctnetlink_del_expect(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
+ const struct nlattr * const cda[])
{
- struct net *net = sock_net(ctnl);
struct nf_conntrack_expect *exp;
struct nf_conntrack_tuple tuple;
struct nfgenmsg *nfmsg = nlmsg_data(nlh);
@@ -3136,12 +3128,10 @@ err_ct:
return err;
}
-static int
-ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const cda[])
+static int ctnetlink_new_expect(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
+ const struct nlattr * const cda[])
{
- struct net *net = sock_net(ctnl);
struct nf_conntrack_tuple tuple;
struct nf_conntrack_expect *exp;
struct nfgenmsg *nfmsg = nlmsg_data(nlh);
@@ -3242,10 +3232,10 @@ ctnetlink_exp_stat_cpu_dump(struct sk_buff *skb, struct netlink_callback *cb)
return skb->len;
}
-static int
-ctnetlink_stat_exp_cpu(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const cda[])
+static int ctnetlink_stat_exp_cpu(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const cda[])
{
if (nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c
index 4a2134fd3fcb..7523a575f6d1 100644
--- a/net/netfilter/nf_conntrack_sane.c
+++ b/net/netfilter/nf_conntrack_sane.c
@@ -17,6 +17,8 @@
* published by the Free Software Foundation.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/netfilter.h>
@@ -120,14 +122,14 @@ static int help(struct sk_buff *skb,
ct_sane_info->state = SANE_STATE_NORMAL;
if (datalen < sizeof(struct sane_reply_net_start)) {
- pr_debug("nf_ct_sane: NET_START reply too short\n");
+ pr_debug("NET_START reply too short\n");
goto out;
}
reply = sb_ptr;
if (reply->status != htonl(SANE_STATUS_SUCCESS)) {
/* saned refused the command */
- pr_debug("nf_ct_sane: unsuccessful SANE_STATUS = %u\n",
+ pr_debug("unsuccessful SANE_STATUS = %u\n",
ntohl(reply->status));
goto out;
}
@@ -148,7 +150,7 @@ static int help(struct sk_buff *skb,
&tuple->src.u3, &tuple->dst.u3,
IPPROTO_TCP, NULL, &reply->port);
- pr_debug("nf_ct_sane: expect: ");
+ pr_debug("expect: ");
nf_ct_dump_tuple(&exp->tuple);
/* Can't expect this? Best to drop packet now. */
@@ -178,8 +180,7 @@ static void nf_conntrack_sane_fini(void)
for (i = 0; i < ports_c; i++) {
for (j = 0; j < 2; j++) {
- pr_debug("nf_ct_sane: unregistering helper for pf: %d "
- "port: %d\n",
+ pr_debug("unregistering helper for pf: %d port: %d\n",
sane[i][j].tuple.src.l3num, ports[i]);
nf_conntrack_helper_unregister(&sane[i][j]);
}
@@ -216,14 +217,12 @@ static int __init nf_conntrack_sane_init(void)
else
sprintf(sane[i][j].name, "sane-%d", ports[i]);
- pr_debug("nf_ct_sane: registering helper for pf: %d "
- "port: %d\n",
+ pr_debug("registering helper for pf: %d port: %d\n",
sane[i][j].tuple.src.l3num, ports[i]);
ret = nf_conntrack_helper_register(&sane[i][j]);
if (ret) {
- printk(KERN_ERR "nf_ct_sane: failed to "
- "register helper for pf: %d port: %d\n",
- sane[i][j].tuple.src.l3num, ports[i]);
+ pr_err("failed to register helper for pf: %d port: %d\n",
+ sane[i][j].tuple.src.l3num, ports[i]);
nf_conntrack_sane_fini();
return ret;
}
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 885b4aba3695..3e06402739e0 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -10,6 +10,8 @@
* published by the Free Software Foundation.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/ctype.h>
#include <linux/skbuff.h>
@@ -1665,8 +1667,7 @@ static int __init nf_conntrack_sip_init(void)
ret = nf_conntrack_helper_register(&sip[i][j]);
if (ret) {
- printk(KERN_ERR "nf_ct_sip: failed to register"
- " helper for pf: %u port: %u\n",
+ pr_err("failed to register helper for pf: %u port: %u\n",
sip[i][j].tuple.src.l3num, ports[i]);
nf_conntrack_sip_fini();
return ret;
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 1fb3cacc04e1..0f1a45bcacb2 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -392,11 +392,18 @@ static const struct file_operations ct_cpu_seq_fops = {
static int nf_conntrack_standalone_init_proc(struct net *net)
{
struct proc_dir_entry *pde;
+ kuid_t root_uid;
+ kgid_t root_gid;
pde = proc_create("nf_conntrack", 0440, net->proc_net, &ct_file_ops);
if (!pde)
goto out_nf_conntrack;
+ root_uid = make_kuid(net->user_ns, 0);
+ root_gid = make_kgid(net->user_ns, 0);
+ if (uid_valid(root_uid) && gid_valid(root_gid))
+ proc_set_user(pde, root_uid, root_gid);
+
pde = proc_create("nf_conntrack", S_IRUGO, net->proc_net_stat,
&ct_cpu_seq_fops);
if (!pde)
diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c
index e68ab4fbd71f..36f964066461 100644
--- a/net/netfilter/nf_conntrack_tftp.c
+++ b/net/netfilter/nf_conntrack_tftp.c
@@ -5,6 +5,8 @@
* published by the Free Software Foundation.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/in.h>
@@ -138,9 +140,8 @@ static int __init nf_conntrack_tftp_init(void)
ret = nf_conntrack_helper_register(&tftp[i][j]);
if (ret) {
- printk(KERN_ERR "nf_ct_tftp: failed to register"
- " helper for pf: %u port: %u\n",
- tftp[i][j].tuple.src.l3num, ports[i]);
+ pr_err("failed to register helper for pf: %u port: %u\n",
+ tftp[i][j].tuple.src.l3num, ports[i]);
nf_conntrack_tftp_fini();
return ret;
}
diff --git a/net/netfilter/nf_conntrack_timeout.c b/net/netfilter/nf_conntrack_timeout.c
index 93da609d9d29..26e742006c48 100644
--- a/net/netfilter/nf_conntrack_timeout.c
+++ b/net/netfilter/nf_conntrack_timeout.c
@@ -25,7 +25,7 @@
#include <net/netfilter/nf_conntrack_timeout.h>
struct ctnl_timeout *
-(*nf_ct_timeout_find_get_hook)(const char *name) __read_mostly;
+(*nf_ct_timeout_find_get_hook)(struct net *net, const char *name) __read_mostly;
EXPORT_SYMBOL_GPL(nf_ct_timeout_find_get_hook);
void (*nf_ct_timeout_put_hook)(struct ctnl_timeout *timeout) __read_mostly;
diff --git a/net/netfilter/nf_dup_netdev.c b/net/netfilter/nf_dup_netdev.c
new file mode 100644
index 000000000000..7ec69723940f
--- /dev/null
+++ b/net/netfilter/nf_dup_netdev.c
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2015 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+void nf_dup_netdev_egress(const struct nft_pktinfo *pkt, int oif)
+{
+ struct net_device *dev;
+ struct sk_buff *skb;
+
+ dev = dev_get_by_index_rcu(pkt->net, oif);
+ if (dev == NULL)
+ return;
+
+ skb = skb_clone(pkt->skb, GFP_ATOMIC);
+ if (skb == NULL)
+ return;
+
+ if (skb_mac_header_was_set(skb))
+ skb_push(skb, skb->mac_len);
+
+ skb->dev = dev;
+ dev_queue_xmit(skb);
+}
+EXPORT_SYMBOL_GPL(nf_dup_netdev_egress);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
diff --git a/net/netfilter/nf_nat_redirect.c b/net/netfilter/nf_nat_redirect.c
index 97b75f9bfbcd..d43869879fcf 100644
--- a/net/netfilter/nf_nat_redirect.c
+++ b/net/netfilter/nf_nat_redirect.c
@@ -55,7 +55,7 @@ nf_nat_redirect_ipv4(struct sk_buff *skb,
rcu_read_lock();
indev = __in_dev_get_rcu(skb->dev);
- if (indev != NULL) {
+ if (indev && indev->ifa_list) {
ifa = indev->ifa_list;
newdst = ifa->ifa_local;
}
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 93cc4737018f..2011977cd79d 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -41,6 +41,8 @@ int nft_register_afinfo(struct net *net, struct nft_af_info *afi)
}
EXPORT_SYMBOL_GPL(nft_register_afinfo);
+static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi);
+
/**
* nft_unregister_afinfo - unregister nf_tables address family info
*
@@ -48,9 +50,10 @@ EXPORT_SYMBOL_GPL(nft_register_afinfo);
*
* Unregister the address family for use with nf_tables.
*/
-void nft_unregister_afinfo(struct nft_af_info *afi)
+void nft_unregister_afinfo(struct net *net, struct nft_af_info *afi)
{
nfnl_lock(NFNL_SUBSYS_NFTABLES);
+ __nft_release_afinfo(net, afi);
list_del_rcu(&afi->list);
nfnl_unlock(NFNL_SUBSYS_NFTABLES);
}
@@ -89,6 +92,7 @@ nf_tables_afinfo_lookup(struct net *net, int family, bool autoload)
}
static void nft_ctx_init(struct nft_ctx *ctx,
+ struct net *net,
const struct sk_buff *skb,
const struct nlmsghdr *nlh,
struct nft_af_info *afi,
@@ -96,7 +100,7 @@ static void nft_ctx_init(struct nft_ctx *ctx,
struct nft_chain *chain,
const struct nlattr * const *nla)
{
- ctx->net = sock_net(skb->sk);
+ ctx->net = net;
ctx->afi = afi;
ctx->table = table;
ctx->chain = chain;
@@ -127,8 +131,8 @@ static void nft_trans_destroy(struct nft_trans *trans)
kfree(trans);
}
-int nft_register_basechain(struct nft_base_chain *basechain,
- unsigned int hook_nops)
+static int nft_register_basechain(struct nft_base_chain *basechain,
+ unsigned int hook_nops)
{
struct net *net = read_pnet(&basechain->pnet);
@@ -137,10 +141,9 @@ int nft_register_basechain(struct nft_base_chain *basechain,
return nf_register_net_hooks(net, basechain->ops, hook_nops);
}
-EXPORT_SYMBOL_GPL(nft_register_basechain);
-void nft_unregister_basechain(struct nft_base_chain *basechain,
- unsigned int hook_nops)
+static void nft_unregister_basechain(struct nft_base_chain *basechain,
+ unsigned int hook_nops)
{
struct net *net = read_pnet(&basechain->pnet);
@@ -149,7 +152,6 @@ void nft_unregister_basechain(struct nft_base_chain *basechain,
nf_unregister_net_hooks(net, basechain->ops, hook_nops);
}
-EXPORT_SYMBOL_GPL(nft_unregister_basechain);
static int nf_tables_register_hooks(const struct nft_table *table,
struct nft_chain *chain,
@@ -541,15 +543,14 @@ done:
return skb->len;
}
-static int nf_tables_gettable(struct sock *nlsk, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
+static int nf_tables_gettable(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
const struct nft_af_info *afi;
const struct nft_table *table;
struct sk_buff *skb2;
- struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
int err;
@@ -672,15 +673,14 @@ err:
return ret;
}
-static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
+static int nf_tables_newtable(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
const struct nlattr *name;
struct nft_af_info *afi;
struct nft_table *table;
- struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
u32 flags = 0;
struct nft_ctx ctx;
@@ -706,7 +706,7 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
if (nlh->nlmsg_flags & NLM_F_REPLACE)
return -EOPNOTSUPP;
- nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla);
+ nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla);
return nf_tables_updtable(&ctx);
}
@@ -730,7 +730,7 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
INIT_LIST_HEAD(&table->sets);
table->flags = flags;
- nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla);
+ nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla);
err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE);
if (err < 0)
goto err3;
@@ -810,18 +810,17 @@ out:
return err;
}
-static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
+static int nf_tables_deltable(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
struct nft_af_info *afi;
struct nft_table *table;
- struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
struct nft_ctx ctx;
- nft_ctx_init(&ctx, skb, nlh, NULL, NULL, NULL, nla);
+ nft_ctx_init(&ctx, net, skb, nlh, NULL, NULL, NULL, nla);
if (family == AF_UNSPEC || nla[NFTA_TABLE_NAME] == NULL)
return nft_flush(&ctx, family);
@@ -832,8 +831,6 @@ static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb,
table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME]);
if (IS_ERR(table))
return PTR_ERR(table);
- if (table->flags & NFT_TABLE_INACTIVE)
- return -ENOENT;
ctx.afi = afi;
ctx.table = table;
@@ -1099,8 +1096,8 @@ done:
return skb->len;
}
-static int nf_tables_getchain(struct sock *nlsk, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
+static int nf_tables_getchain(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
@@ -1108,7 +1105,6 @@ static int nf_tables_getchain(struct sock *nlsk, struct sk_buff *skb,
const struct nft_table *table;
const struct nft_chain *chain;
struct sk_buff *skb2;
- struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
int err;
@@ -1221,8 +1217,8 @@ static void nf_tables_chain_destroy(struct nft_chain *chain)
}
}
-static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
+static int nf_tables_newchain(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
@@ -1232,7 +1228,6 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
struct nft_chain *chain;
struct nft_base_chain *basechain = NULL;
struct nlattr *ha[NFTA_HOOK_MAX + 1];
- struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
struct net_device *dev = NULL;
u8 policy = NF_ACCEPT;
@@ -1313,7 +1308,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
return PTR_ERR(stats);
}
- nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
+ nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla);
trans = nft_trans_alloc(&ctx, NFT_MSG_NEWCHAIN,
sizeof(struct nft_trans_chain));
if (trans == NULL) {
@@ -1461,7 +1456,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
if (err < 0)
goto err1;
- nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
+ nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla);
err = nft_trans_chain_add(&ctx, NFT_MSG_NEWCHAIN);
if (err < 0)
goto err2;
@@ -1476,15 +1471,14 @@ err1:
return err;
}
-static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
+static int nf_tables_delchain(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
struct nft_af_info *afi;
struct nft_table *table;
struct nft_chain *chain;
- struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
struct nft_ctx ctx;
@@ -1495,18 +1489,14 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb,
table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]);
if (IS_ERR(table))
return PTR_ERR(table);
- if (table->flags & NFT_TABLE_INACTIVE)
- return -ENOENT;
chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]);
if (IS_ERR(chain))
return PTR_ERR(chain);
- if (chain->flags & NFT_CHAIN_INACTIVE)
- return -ENOENT;
if (chain->use > 0)
return -EBUSY;
- nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
+ nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla);
return nft_delchain(&ctx);
}
@@ -1931,8 +1921,8 @@ done:
return skb->len;
}
-static int nf_tables_getrule(struct sock *nlsk, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
+static int nf_tables_getrule(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
@@ -1941,7 +1931,6 @@ static int nf_tables_getrule(struct sock *nlsk, struct sk_buff *skb,
const struct nft_chain *chain;
const struct nft_rule *rule;
struct sk_buff *skb2;
- struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
int err;
@@ -2010,13 +1999,12 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
static struct nft_expr_info *info;
-static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
+static int nf_tables_newrule(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
struct nft_af_info *afi;
- struct net *net = sock_net(skb->sk);
struct nft_table *table;
struct nft_chain *chain;
struct nft_rule *rule, *old_rule = NULL;
@@ -2075,7 +2063,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
return PTR_ERR(old_rule);
}
- nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
+ nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla);
n = 0;
size = 0;
@@ -2176,13 +2164,12 @@ err1:
return err;
}
-static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
+static int nf_tables_delrule(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
struct nft_af_info *afi;
- struct net *net = sock_net(skb->sk);
struct nft_table *table;
struct nft_chain *chain = NULL;
struct nft_rule *rule;
@@ -2196,8 +2183,6 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb,
table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]);
if (IS_ERR(table))
return PTR_ERR(table);
- if (table->flags & NFT_TABLE_INACTIVE)
- return -ENOENT;
if (nla[NFTA_RULE_CHAIN]) {
chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]);
@@ -2205,7 +2190,7 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb,
return PTR_ERR(chain);
}
- nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
+ nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla);
if (chain) {
if (nla[NFTA_RULE_HANDLE]) {
@@ -2338,18 +2323,19 @@ static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = {
[NFTA_SET_ID] = { .type = NLA_U32 },
[NFTA_SET_TIMEOUT] = { .type = NLA_U64 },
[NFTA_SET_GC_INTERVAL] = { .type = NLA_U32 },
+ [NFTA_SET_USERDATA] = { .type = NLA_BINARY,
+ .len = NFT_USERDATA_MAXLEN },
};
static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = {
[NFTA_SET_DESC_SIZE] = { .type = NLA_U32 },
};
-static int nft_ctx_init_from_setattr(struct nft_ctx *ctx,
+static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, struct net *net,
const struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
- struct net *net = sock_net(skb->sk);
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
struct nft_af_info *afi = NULL;
struct nft_table *table = NULL;
@@ -2367,11 +2353,9 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx,
table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE]);
if (IS_ERR(table))
return PTR_ERR(table);
- if (table->flags & NFT_TABLE_INACTIVE)
- return -ENOENT;
}
- nft_ctx_init(ctx, skb, nlh, afi, table, NULL, nla);
+ nft_ctx_init(ctx, net, skb, nlh, afi, table, NULL, nla);
return 0;
}
@@ -2500,6 +2484,9 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
goto nla_put_failure;
}
+ if (nla_put(skb, NFTA_SET_USERDATA, set->udlen, set->udata))
+ goto nla_put_failure;
+
desc = nla_nest_start(skb, NFTA_SET_DESC);
if (desc == NULL)
goto nla_put_failure;
@@ -2619,8 +2606,8 @@ static int nf_tables_dump_sets_done(struct netlink_callback *cb)
return 0;
}
-static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
+static int nf_tables_getset(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
const struct nft_set *set;
@@ -2630,7 +2617,7 @@ static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb,
int err;
/* Verify existence before starting dump */
- err = nft_ctx_init_from_setattr(&ctx, skb, nlh, nla);
+ err = nft_ctx_init_from_setattr(&ctx, net, skb, nlh, nla);
if (err < 0)
return err;
@@ -2693,14 +2680,13 @@ static int nf_tables_set_desc_parse(const struct nft_ctx *ctx,
return 0;
}
-static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
+static int nf_tables_newset(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
const struct nft_set_ops *ops;
struct nft_af_info *afi;
- struct net *net = sock_net(skb->sk);
struct nft_table *table;
struct nft_set *set;
struct nft_ctx ctx;
@@ -2710,6 +2696,8 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
u64 timeout;
u32 ktype, dtype, flags, policy, gc_int;
struct nft_set_desc desc;
+ unsigned char *udata;
+ u16 udlen;
int err;
if (nla[NFTA_SET_TABLE] == NULL ||
@@ -2798,7 +2786,7 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
if (IS_ERR(table))
return PTR_ERR(table);
- nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla);
+ nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla);
set = nf_tables_set_lookup(table, nla[NFTA_SET_NAME]);
if (IS_ERR(set)) {
@@ -2822,12 +2810,16 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
if (IS_ERR(ops))
return PTR_ERR(ops);
+ udlen = 0;
+ if (nla[NFTA_SET_USERDATA])
+ udlen = nla_len(nla[NFTA_SET_USERDATA]);
+
size = 0;
if (ops->privsize != NULL)
size = ops->privsize(nla);
err = -ENOMEM;
- set = kzalloc(sizeof(*set) + size, GFP_KERNEL);
+ set = kzalloc(sizeof(*set) + size + udlen, GFP_KERNEL);
if (set == NULL)
goto err1;
@@ -2836,6 +2828,12 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
if (err < 0)
goto err2;
+ udata = NULL;
+ if (udlen) {
+ udata = set->data + size;
+ nla_memcpy(udata, nla[NFTA_SET_USERDATA], udlen);
+ }
+
INIT_LIST_HEAD(&set->bindings);
write_pnet(&set->pnet, net);
set->ops = ops;
@@ -2846,6 +2844,8 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
set->flags = flags;
set->size = desc.size;
set->policy = policy;
+ set->udlen = udlen;
+ set->udata = udata;
set->timeout = timeout;
set->gc_int = gc_int;
@@ -2882,8 +2882,8 @@ static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set
nft_set_destroy(set);
}
-static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
+static int nf_tables_delset(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
@@ -2896,15 +2896,13 @@ static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb,
if (nla[NFTA_SET_TABLE] == NULL)
return -EINVAL;
- err = nft_ctx_init_from_setattr(&ctx, skb, nlh, nla);
+ err = nft_ctx_init_from_setattr(&ctx, net, skb, nlh, nla);
if (err < 0)
return err;
set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]);
if (IS_ERR(set))
return PTR_ERR(set);
- if (set->flags & NFT_SET_INACTIVE)
- return -ENOENT;
if (!list_empty(&set->bindings))
return -EBUSY;
@@ -3024,16 +3022,14 @@ static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX +
[NFTA_SET_ELEM_LIST_SET_ID] = { .type = NLA_U32 },
};
-static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx,
+static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, struct net *net,
const struct sk_buff *skb,
const struct nlmsghdr *nlh,
- const struct nlattr * const nla[],
- bool trans)
+ const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
struct nft_af_info *afi;
struct nft_table *table;
- struct net *net = sock_net(skb->sk);
afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false);
if (IS_ERR(afi))
@@ -3042,10 +3038,8 @@ static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx,
table = nf_tables_table_lookup(afi, nla[NFTA_SET_ELEM_LIST_TABLE]);
if (IS_ERR(table))
return PTR_ERR(table);
- if (!trans && (table->flags & NFT_TABLE_INACTIVE))
- return -ENOENT;
- nft_ctx_init(ctx, skb, nlh, afi, table, NULL, nla);
+ nft_ctx_init(ctx, net, skb, nlh, afi, table, NULL, nla);
return 0;
}
@@ -3135,6 +3129,7 @@ static int nf_tables_dump_setelem(const struct nft_ctx *ctx,
static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
{
+ struct net *net = sock_net(skb->sk);
const struct nft_set *set;
struct nft_set_dump_args args;
struct nft_ctx ctx;
@@ -3150,10 +3145,12 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
if (err < 0)
return err;
- err = nft_ctx_init_from_elemattr(&ctx, cb->skb, cb->nlh, (void *)nla,
- false);
+ err = nft_ctx_init_from_elemattr(&ctx, net, cb->skb, cb->nlh,
+ (void *)nla);
if (err < 0)
return err;
+ if (ctx.table->flags & NFT_TABLE_INACTIVE)
+ return -ENOENT;
set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]);
if (IS_ERR(set))
@@ -3208,17 +3205,19 @@ nla_put_failure:
return -ENOSPC;
}
-static int nf_tables_getsetelem(struct sock *nlsk, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
+static int nf_tables_getsetelem(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
const struct nft_set *set;
struct nft_ctx ctx;
int err;
- err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla, false);
+ err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla);
if (err < 0)
return err;
+ if (ctx.table->flags & NFT_TABLE_INACTIVE)
+ return -ENOENT;
set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]);
if (IS_ERR(set))
@@ -3528,11 +3527,10 @@ err1:
return err;
}
-static int nf_tables_newsetelem(struct sock *nlsk, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
+static int nf_tables_newsetelem(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
- struct net *net = sock_net(skb->sk);
const struct nlattr *attr;
struct nft_set *set;
struct nft_ctx ctx;
@@ -3541,7 +3539,7 @@ static int nf_tables_newsetelem(struct sock *nlsk, struct sk_buff *skb,
if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL)
return -EINVAL;
- err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla, true);
+ err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla);
if (err < 0)
return err;
@@ -3623,8 +3621,8 @@ err1:
return err;
}
-static int nf_tables_delsetelem(struct sock *nlsk, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
+static int nf_tables_delsetelem(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
const struct nlattr *attr;
@@ -3635,7 +3633,7 @@ static int nf_tables_delsetelem(struct sock *nlsk, struct sk_buff *skb,
if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL)
return -EINVAL;
- err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla, false);
+ err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla);
if (err < 0)
return err;
@@ -3739,11 +3737,10 @@ err:
return err;
}
-static int nf_tables_getgen(struct sock *nlsk, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
+static int nf_tables_getgen(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
- struct net *net = sock_net(skb->sk);
struct sk_buff *skb2;
int err;
@@ -3887,9 +3884,8 @@ static void nf_tables_commit_release(struct nft_trans *trans)
kfree(trans);
}
-static int nf_tables_commit(struct sk_buff *skb)
+static int nf_tables_commit(struct net *net, struct sk_buff *skb)
{
- struct net *net = sock_net(skb->sk);
struct nft_trans *trans, *next;
struct nft_trans_elem *te;
@@ -4024,13 +4020,13 @@ static void nf_tables_abort_release(struct nft_trans *trans)
kfree(trans);
}
-static int nf_tables_abort(struct sk_buff *skb)
+static int nf_tables_abort(struct net *net, struct sk_buff *skb)
{
- struct net *net = sock_net(skb->sk);
struct nft_trans *trans, *next;
struct nft_trans_elem *te;
- list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
+ list_for_each_entry_safe_reverse(trans, next, &net->nft.commit_list,
+ list) {
switch (trans->msg_type) {
case NFT_MSG_NEWTABLE:
if (nft_trans_table_update(trans)) {
@@ -4446,22 +4442,22 @@ static void nft_verdict_uninit(const struct nft_data *data)
}
}
-static int nft_verdict_dump(struct sk_buff *skb, const struct nft_data *data)
+int nft_verdict_dump(struct sk_buff *skb, int type, const struct nft_verdict *v)
{
struct nlattr *nest;
- nest = nla_nest_start(skb, NFTA_DATA_VERDICT);
+ nest = nla_nest_start(skb, type);
if (!nest)
goto nla_put_failure;
- if (nla_put_be32(skb, NFTA_VERDICT_CODE, htonl(data->verdict.code)))
+ if (nla_put_be32(skb, NFTA_VERDICT_CODE, htonl(v->code)))
goto nla_put_failure;
- switch (data->verdict.code) {
+ switch (v->code) {
case NFT_JUMP:
case NFT_GOTO:
if (nla_put_string(skb, NFTA_VERDICT_CHAIN,
- data->verdict.chain->name))
+ v->chain->name))
goto nla_put_failure;
}
nla_nest_end(skb, nest);
@@ -4572,7 +4568,7 @@ int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data,
err = nft_value_dump(skb, data, len);
break;
case NFT_DATA_VERDICT:
- err = nft_verdict_dump(skb, data);
+ err = nft_verdict_dump(skb, NFTA_DATA_VERDICT, &data->verdict);
break;
default:
err = -EINVAL;
@@ -4584,7 +4580,7 @@ int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data,
}
EXPORT_SYMBOL_GPL(nft_data_dump);
-static int nf_tables_init_net(struct net *net)
+static int __net_init nf_tables_init_net(struct net *net)
{
INIT_LIST_HEAD(&net->nft.af_info);
INIT_LIST_HEAD(&net->nft.commit_list);
@@ -4592,6 +4588,67 @@ static int nf_tables_init_net(struct net *net)
return 0;
}
+int __nft_release_basechain(struct nft_ctx *ctx)
+{
+ struct nft_rule *rule, *nr;
+
+ BUG_ON(!(ctx->chain->flags & NFT_BASE_CHAIN));
+
+ nf_tables_unregister_hooks(ctx->chain->table, ctx->chain,
+ ctx->afi->nops);
+ list_for_each_entry_safe(rule, nr, &ctx->chain->rules, list) {
+ list_del(&rule->list);
+ ctx->chain->use--;
+ nf_tables_rule_destroy(ctx, rule);
+ }
+ list_del(&ctx->chain->list);
+ ctx->table->use--;
+ nf_tables_chain_destroy(ctx->chain);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(__nft_release_basechain);
+
+/* Called by nft_unregister_afinfo() from __net_exit path, nfnl_lock is held. */
+static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi)
+{
+ struct nft_table *table, *nt;
+ struct nft_chain *chain, *nc;
+ struct nft_rule *rule, *nr;
+ struct nft_set *set, *ns;
+ struct nft_ctx ctx = {
+ .net = net,
+ .afi = afi,
+ };
+
+ list_for_each_entry_safe(table, nt, &afi->tables, list) {
+ list_for_each_entry(chain, &table->chains, list)
+ nf_tables_unregister_hooks(table, chain, afi->nops);
+ /* No packets are walking on these chains anymore. */
+ ctx.table = table;
+ list_for_each_entry(chain, &table->chains, list) {
+ ctx.chain = chain;
+ list_for_each_entry_safe(rule, nr, &chain->rules, list) {
+ list_del(&rule->list);
+ chain->use--;
+ nf_tables_rule_destroy(&ctx, rule);
+ }
+ }
+ list_for_each_entry_safe(set, ns, &table->sets, list) {
+ list_del(&set->list);
+ table->use--;
+ nft_set_destroy(set);
+ }
+ list_for_each_entry_safe(chain, nc, &table->chains, list) {
+ list_del(&chain->list);
+ table->use--;
+ nf_tables_chain_destroy(chain);
+ }
+ list_del(&table->list);
+ nf_tables_table_destroy(&ctx);
+ }
+}
+
static struct pernet_operations nf_tables_net_ops = {
.init = nf_tables_init_net,
};
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index f3695a497408..e9f8dffcc244 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -16,22 +16,17 @@
#include <linux/skbuff.h>
#include <linux/netlink.h>
#include <linux/netfilter.h>
+#include <linux/static_key.h>
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables_core.h>
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nf_log.h>
-enum nft_trace {
- NFT_TRACE_RULE,
- NFT_TRACE_RETURN,
- NFT_TRACE_POLICY,
-};
-
-static const char *const comments[] = {
- [NFT_TRACE_RULE] = "rule",
- [NFT_TRACE_RETURN] = "return",
- [NFT_TRACE_POLICY] = "policy",
+static const char *const comments[__NFT_TRACETYPE_MAX] = {
+ [NFT_TRACETYPE_POLICY] = "policy",
+ [NFT_TRACETYPE_RETURN] = "return",
+ [NFT_TRACETYPE_RULE] = "rule",
};
static struct nf_loginfo trace_loginfo = {
@@ -44,22 +39,36 @@ static struct nf_loginfo trace_loginfo = {
},
};
-static void __nft_trace_packet(const struct nft_pktinfo *pkt,
- const struct nft_chain *chain,
- int rulenum, enum nft_trace type)
+static noinline void __nft_trace_packet(struct nft_traceinfo *info,
+ const struct nft_chain *chain,
+ int rulenum, enum nft_trace_types type)
{
+ const struct nft_pktinfo *pkt = info->pkt;
+
+ if (!info->trace || !pkt->skb->nf_trace)
+ return;
+
+ info->chain = chain;
+ info->type = type;
+
+ nft_trace_notify(info);
+
nf_log_trace(pkt->net, pkt->pf, pkt->hook, pkt->skb, pkt->in,
pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ",
chain->table->name, chain->name, comments[type],
rulenum);
}
-static inline void nft_trace_packet(const struct nft_pktinfo *pkt,
+static inline void nft_trace_packet(struct nft_traceinfo *info,
const struct nft_chain *chain,
- int rulenum, enum nft_trace type)
+ const struct nft_rule *rule,
+ int rulenum,
+ enum nft_trace_types type)
{
- if (unlikely(pkt->skb->nf_trace))
- __nft_trace_packet(pkt, chain, rulenum, type);
+ if (static_branch_unlikely(&nft_trace_enabled)) {
+ info->rule = rule;
+ __nft_trace_packet(info, chain, rulenum, type);
+ }
}
static void nft_cmp_fast_eval(const struct nft_expr *expr,
@@ -121,7 +130,11 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv)
struct nft_stats *stats;
int rulenum;
unsigned int gencursor = nft_genmask_cur(net);
+ struct nft_traceinfo info;
+ info.trace = false;
+ if (static_branch_unlikely(&nft_trace_enabled))
+ nft_trace_init(&info, pkt, &regs.verdict, basechain);
do_chain:
rulenum = 0;
rule = list_entry(&chain->rules, struct nft_rule, list);
@@ -151,7 +164,8 @@ next_rule:
regs.verdict.code = NFT_CONTINUE;
continue;
case NFT_CONTINUE:
- nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
+ nft_trace_packet(&info, chain, rule,
+ rulenum, NFT_TRACETYPE_RULE);
continue;
}
break;
@@ -161,7 +175,8 @@ next_rule:
case NF_ACCEPT:
case NF_DROP:
case NF_QUEUE:
- nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
+ nft_trace_packet(&info, chain, rule,
+ rulenum, NFT_TRACETYPE_RULE);
return regs.verdict.code;
}
@@ -174,7 +189,8 @@ next_rule:
stackptr++;
/* fall through */
case NFT_GOTO:
- nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
+ nft_trace_packet(&info, chain, rule,
+ rulenum, NFT_TRACETYPE_RULE);
chain = regs.verdict.chain;
goto do_chain;
@@ -182,7 +198,8 @@ next_rule:
rulenum++;
/* fall through */
case NFT_RETURN:
- nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RETURN);
+ nft_trace_packet(&info, chain, rule,
+ rulenum, NFT_TRACETYPE_RETURN);
break;
default:
WARN_ON(1);
@@ -196,7 +213,8 @@ next_rule:
goto next_rule;
}
- nft_trace_packet(pkt, basechain, -1, NFT_TRACE_POLICY);
+ nft_trace_packet(&info, basechain, NULL, -1,
+ NFT_TRACETYPE_POLICY);
rcu_read_lock_bh();
stats = this_cpu_ptr(rcu_dereference(nft_base_chain(basechain)->stats));
diff --git a/net/netfilter/nf_tables_inet.c b/net/netfilter/nf_tables_inet.c
index 9dd2d216cfc1..6b5f76295d3d 100644
--- a/net/netfilter/nf_tables_inet.c
+++ b/net/netfilter/nf_tables_inet.c
@@ -57,7 +57,7 @@ err:
static void __net_exit nf_tables_inet_exit_net(struct net *net)
{
- nft_unregister_afinfo(net->nft.inet);
+ nft_unregister_afinfo(net, net->nft.inet);
kfree(net->nft.inet);
}
diff --git a/net/netfilter/nf_tables_netdev.c b/net/netfilter/nf_tables_netdev.c
index 7b9c053ba750..5eefe4a355c6 100644
--- a/net/netfilter/nf_tables_netdev.c
+++ b/net/netfilter/nf_tables_netdev.c
@@ -94,7 +94,7 @@ nft_do_chain_netdev(void *priv, struct sk_buff *skb,
{
struct nft_pktinfo pkt;
- switch (eth_hdr(skb)->h_proto) {
+ switch (skb->protocol) {
case htons(ETH_P_IP):
nft_netdev_set_pktinfo_ipv4(&pkt, skb, state);
break;
@@ -139,7 +139,7 @@ err:
static void nf_tables_netdev_exit_net(struct net *net)
{
- nft_unregister_afinfo(net->nft.netdev);
+ nft_unregister_afinfo(net, net->nft.netdev);
kfree(net->nft.netdev);
}
@@ -156,35 +156,17 @@ static const struct nf_chain_type nft_filter_chain_netdev = {
.hook_mask = (1 << NF_NETDEV_INGRESS),
};
-static void nft_netdev_event(unsigned long event, struct nft_af_info *afi,
- struct net_device *dev, struct nft_table *table,
- struct nft_base_chain *basechain)
+static void nft_netdev_event(unsigned long event, struct net_device *dev,
+ struct nft_ctx *ctx)
{
- switch (event) {
- case NETDEV_REGISTER:
- if (strcmp(basechain->dev_name, dev->name) != 0)
- return;
-
- BUG_ON(!(basechain->flags & NFT_BASECHAIN_DISABLED));
+ struct nft_base_chain *basechain = nft_base_chain(ctx->chain);
- dev_hold(dev);
- basechain->ops[0].dev = dev;
- basechain->flags &= ~NFT_BASECHAIN_DISABLED;
- if (!(table->flags & NFT_TABLE_F_DORMANT))
- nft_register_basechain(basechain, afi->nops);
- break;
+ switch (event) {
case NETDEV_UNREGISTER:
if (strcmp(basechain->dev_name, dev->name) != 0)
return;
- BUG_ON(basechain->flags & NFT_BASECHAIN_DISABLED);
-
- if (!(table->flags & NFT_TABLE_F_DORMANT))
- nft_unregister_basechain(basechain, afi->nops);
-
- dev_put(basechain->ops[0].dev);
- basechain->ops[0].dev = NULL;
- basechain->flags |= NFT_BASECHAIN_DISABLED;
+ __nft_release_basechain(ctx);
break;
case NETDEV_CHANGENAME:
if (dev->ifindex != basechain->ops[0].dev->ifindex)
@@ -201,20 +183,29 @@ static int nf_tables_netdev_event(struct notifier_block *this,
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct nft_af_info *afi;
struct nft_table *table;
- struct nft_chain *chain;
+ struct nft_chain *chain, *nr;
+ struct nft_ctx ctx = {
+ .net = dev_net(dev),
+ };
+
+ if (event != NETDEV_UNREGISTER &&
+ event != NETDEV_CHANGENAME)
+ return NOTIFY_DONE;
nfnl_lock(NFNL_SUBSYS_NFTABLES);
list_for_each_entry(afi, &dev_net(dev)->nft.af_info, list) {
+ ctx.afi = afi;
if (afi->family != NFPROTO_NETDEV)
continue;
list_for_each_entry(table, &afi->tables, list) {
- list_for_each_entry(chain, &table->chains, list) {
+ ctx.table = table;
+ list_for_each_entry_safe(chain, nr, &table->chains, list) {
if (!(chain->flags & NFT_BASE_CHAIN))
continue;
- nft_netdev_event(event, afi, dev, table,
- nft_base_chain(chain));
+ ctx.chain = chain;
+ nft_netdev_event(event, dev, &ctx);
}
}
}
@@ -233,12 +224,12 @@ static int __init nf_tables_netdev_init(void)
nft_register_chain_type(&nft_filter_chain_netdev);
ret = register_pernet_subsys(&nf_tables_netdev_net_ops);
- if (ret < 0)
+ if (ret < 0) {
nft_unregister_chain_type(&nft_filter_chain_netdev);
-
+ return ret;
+ }
register_netdevice_notifier(&nf_tables_netdev_notifier);
-
- return ret;
+ return 0;
}
static void __exit nf_tables_netdev_exit(void)
diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c
new file mode 100644
index 000000000000..e9e959f65d91
--- /dev/null
+++ b/net/netfilter/nf_tables_trace.c
@@ -0,0 +1,275 @@
+/*
+ * (C) 2015 Red Hat GmbH
+ * Author: Florian Westphal <fw@strlen.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/static_key.h>
+#include <linux/hash.h>
+#include <linux/jhash.h>
+#include <linux/if_vlan.h>
+#include <linux/init.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+
+#define NFT_TRACETYPE_LL_HSIZE 20
+#define NFT_TRACETYPE_NETWORK_HSIZE 40
+#define NFT_TRACETYPE_TRANSPORT_HSIZE 20
+
+DEFINE_STATIC_KEY_FALSE(nft_trace_enabled);
+EXPORT_SYMBOL_GPL(nft_trace_enabled);
+
+static int trace_fill_id(struct sk_buff *nlskb, struct sk_buff *skb)
+{
+ __be32 id;
+
+ /* using skb address as ID results in a limited number of
+ * values (and quick reuse).
+ *
+ * So we attempt to use as many skb members that will not
+ * change while skb is with netfilter.
+ */
+ id = (__be32)jhash_2words(hash32_ptr(skb), skb_get_hash(skb),
+ skb->skb_iif);
+
+ return nla_put_be32(nlskb, NFTA_TRACE_ID, id);
+}
+
+static int trace_fill_header(struct sk_buff *nlskb, u16 type,
+ const struct sk_buff *skb,
+ int off, unsigned int len)
+{
+ struct nlattr *nla;
+
+ if (len == 0)
+ return 0;
+
+ nla = nla_reserve(nlskb, type, len);
+ if (!nla || skb_copy_bits(skb, off, nla_data(nla), len))
+ return -1;
+
+ return 0;
+}
+
+static int nf_trace_fill_ll_header(struct sk_buff *nlskb,
+ const struct sk_buff *skb)
+{
+ struct vlan_ethhdr veth;
+ int off;
+
+ BUILD_BUG_ON(sizeof(veth) > NFT_TRACETYPE_LL_HSIZE);
+
+ off = skb_mac_header(skb) - skb->data;
+ if (off != -ETH_HLEN)
+ return -1;
+
+ if (skb_copy_bits(skb, off, &veth, ETH_HLEN))
+ return -1;
+
+ veth.h_vlan_proto = skb->vlan_proto;
+ veth.h_vlan_TCI = htons(skb_vlan_tag_get(skb));
+ veth.h_vlan_encapsulated_proto = skb->protocol;
+
+ return nla_put(nlskb, NFTA_TRACE_LL_HEADER, sizeof(veth), &veth);
+}
+
+static int nf_trace_fill_dev_info(struct sk_buff *nlskb,
+ const struct net_device *indev,
+ const struct net_device *outdev)
+{
+ if (indev) {
+ if (nla_put_be32(nlskb, NFTA_TRACE_IIF,
+ htonl(indev->ifindex)))
+ return -1;
+
+ if (nla_put_be16(nlskb, NFTA_TRACE_IIFTYPE,
+ htons(indev->type)))
+ return -1;
+ }
+
+ if (outdev) {
+ if (nla_put_be32(nlskb, NFTA_TRACE_OIF,
+ htonl(outdev->ifindex)))
+ return -1;
+
+ if (nla_put_be16(nlskb, NFTA_TRACE_OIFTYPE,
+ htons(outdev->type)))
+ return -1;
+ }
+
+ return 0;
+}
+
+static int nf_trace_fill_pkt_info(struct sk_buff *nlskb,
+ const struct nft_pktinfo *pkt)
+{
+ const struct sk_buff *skb = pkt->skb;
+ unsigned int len = min_t(unsigned int,
+ pkt->xt.thoff - skb_network_offset(skb),
+ NFT_TRACETYPE_NETWORK_HSIZE);
+ int off = skb_network_offset(skb);
+
+ if (trace_fill_header(nlskb, NFTA_TRACE_NETWORK_HEADER, skb, off, len))
+ return -1;
+
+ len = min_t(unsigned int, skb->len - pkt->xt.thoff,
+ NFT_TRACETYPE_TRANSPORT_HSIZE);
+
+ if (trace_fill_header(nlskb, NFTA_TRACE_TRANSPORT_HEADER, skb,
+ pkt->xt.thoff, len))
+ return -1;
+
+ if (!skb_mac_header_was_set(skb))
+ return 0;
+
+ if (skb_vlan_tag_get(skb))
+ return nf_trace_fill_ll_header(nlskb, skb);
+
+ off = skb_mac_header(skb) - skb->data;
+ len = min_t(unsigned int, -off, NFT_TRACETYPE_LL_HSIZE);
+ return trace_fill_header(nlskb, NFTA_TRACE_LL_HEADER,
+ skb, off, len);
+}
+
+static int nf_trace_fill_rule_info(struct sk_buff *nlskb,
+ const struct nft_traceinfo *info)
+{
+ if (!info->rule)
+ return 0;
+
+ /* a continue verdict with ->type == RETURN means that this is
+ * an implicit return (end of chain reached).
+ *
+ * Since no rule matched, the ->rule pointer is invalid.
+ */
+ if (info->type == NFT_TRACETYPE_RETURN &&
+ info->verdict->code == NFT_CONTINUE)
+ return 0;
+
+ return nla_put_be64(nlskb, NFTA_TRACE_RULE_HANDLE,
+ cpu_to_be64(info->rule->handle));
+}
+
+void nft_trace_notify(struct nft_traceinfo *info)
+{
+ const struct nft_pktinfo *pkt = info->pkt;
+ struct nfgenmsg *nfmsg;
+ struct nlmsghdr *nlh;
+ struct sk_buff *skb;
+ unsigned int size;
+ int event = (NFNL_SUBSYS_NFTABLES << 8) | NFT_MSG_TRACE;
+
+ if (!nfnetlink_has_listeners(pkt->net, NFNLGRP_NFTRACE))
+ return;
+
+ size = nlmsg_total_size(sizeof(struct nfgenmsg)) +
+ nla_total_size(NFT_TABLE_MAXNAMELEN) +
+ nla_total_size(NFT_CHAIN_MAXNAMELEN) +
+ nla_total_size(sizeof(__be64)) + /* rule handle */
+ nla_total_size(sizeof(__be32)) + /* trace type */
+ nla_total_size(0) + /* VERDICT, nested */
+ nla_total_size(sizeof(u32)) + /* verdict code */
+ nla_total_size(NFT_CHAIN_MAXNAMELEN) + /* jump target */
+ nla_total_size(sizeof(u32)) + /* id */
+ nla_total_size(NFT_TRACETYPE_LL_HSIZE) +
+ nla_total_size(NFT_TRACETYPE_NETWORK_HSIZE) +
+ nla_total_size(NFT_TRACETYPE_TRANSPORT_HSIZE) +
+ nla_total_size(sizeof(u32)) + /* iif */
+ nla_total_size(sizeof(__be16)) + /* iiftype */
+ nla_total_size(sizeof(u32)) + /* oif */
+ nla_total_size(sizeof(__be16)) + /* oiftype */
+ nla_total_size(sizeof(u32)) + /* mark */
+ nla_total_size(sizeof(u32)) + /* nfproto */
+ nla_total_size(sizeof(u32)); /* policy */
+
+ skb = nlmsg_new(size, GFP_ATOMIC);
+ if (!skb)
+ return;
+
+ nlh = nlmsg_put(skb, 0, 0, event, sizeof(struct nfgenmsg), 0);
+ if (!nlh)
+ goto nla_put_failure;
+
+ nfmsg = nlmsg_data(nlh);
+ nfmsg->nfgen_family = info->basechain->type->family;
+ nfmsg->version = NFNETLINK_V0;
+ nfmsg->res_id = 0;
+
+ if (nla_put_be32(skb, NFTA_TRACE_NFPROTO, htonl(pkt->pf)))
+ goto nla_put_failure;
+
+ if (nla_put_be32(skb, NFTA_TRACE_TYPE, htonl(info->type)))
+ goto nla_put_failure;
+
+ if (trace_fill_id(skb, pkt->skb))
+ goto nla_put_failure;
+
+ if (info->chain) {
+ if (nla_put_string(skb, NFTA_TRACE_CHAIN,
+ info->chain->name))
+ goto nla_put_failure;
+ if (nla_put_string(skb, NFTA_TRACE_TABLE,
+ info->chain->table->name))
+ goto nla_put_failure;
+ }
+
+ if (nf_trace_fill_rule_info(skb, info))
+ goto nla_put_failure;
+
+ switch (info->type) {
+ case NFT_TRACETYPE_UNSPEC:
+ case __NFT_TRACETYPE_MAX:
+ break;
+ case NFT_TRACETYPE_RETURN:
+ case NFT_TRACETYPE_RULE:
+ if (nft_verdict_dump(skb, NFTA_TRACE_VERDICT, info->verdict))
+ goto nla_put_failure;
+ break;
+ case NFT_TRACETYPE_POLICY:
+ if (nla_put_be32(skb, NFTA_TRACE_POLICY,
+ info->basechain->policy))
+ goto nla_put_failure;
+ break;
+ }
+
+ if (pkt->skb->mark &&
+ nla_put_be32(skb, NFTA_TRACE_MARK, htonl(pkt->skb->mark)))
+ goto nla_put_failure;
+
+ if (!info->packet_dumped) {
+ if (nf_trace_fill_dev_info(skb, pkt->in, pkt->out))
+ goto nla_put_failure;
+
+ if (nf_trace_fill_pkt_info(skb, pkt))
+ goto nla_put_failure;
+ info->packet_dumped = true;
+ }
+
+ nlmsg_end(skb, nlh);
+ nfnetlink_send(skb, pkt->net, 0, NFNLGRP_NFTRACE, 0, GFP_ATOMIC);
+ return;
+
+ nla_put_failure:
+ WARN_ON_ONCE(1);
+ kfree_skb(skb);
+}
+
+void nft_trace_init(struct nft_traceinfo *info, const struct nft_pktinfo *pkt,
+ const struct nft_verdict *verdict,
+ const struct nft_chain *chain)
+{
+ info->basechain = nft_base_chain(chain);
+ info->trace = true;
+ info->packet_dumped = false;
+ info->pkt = pkt;
+ info->verdict = verdict;
+}
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index f1d9e887f5b1..2278d9ab723b 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -33,6 +33,10 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NETFILTER);
+#define nfnl_dereference_protected(id) \
+ rcu_dereference_protected(table[(id)].subsys, \
+ lockdep_nfnl_is_held((id)))
+
static char __initdata nfversion[] = "0.30";
static struct {
@@ -49,6 +53,7 @@ static const int nfnl_group2type[NFNLGRP_MAX+1] = {
[NFNLGRP_CONNTRACK_EXP_DESTROY] = NFNL_SUBSYS_CTNETLINK_EXP,
[NFNLGRP_NFTABLES] = NFNL_SUBSYS_NFTABLES,
[NFNLGRP_ACCT_QUOTA] = NFNL_SUBSYS_ACCT,
+ [NFNLGRP_NFTRACE] = NFNL_SUBSYS_NFTABLES,
};
void nfnl_lock(__u8 subsys_id)
@@ -122,13 +127,6 @@ int nfnetlink_has_listeners(struct net *net, unsigned int group)
}
EXPORT_SYMBOL_GPL(nfnetlink_has_listeners);
-struct sk_buff *nfnetlink_alloc_skb(struct net *net, unsigned int size,
- u32 dst_portid, gfp_t gfp_mask)
-{
- return netlink_alloc_skb(net->nfnl, size, dst_portid, gfp_mask);
-}
-EXPORT_SYMBOL_GPL(nfnetlink_alloc_skb);
-
int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 portid,
unsigned int group, int echo, gfp_t flags)
{
@@ -201,19 +199,18 @@ replay:
}
if (nc->call_rcu) {
- err = nc->call_rcu(net->nfnl, skb, nlh,
+ err = nc->call_rcu(net, net->nfnl, skb, nlh,
(const struct nlattr **)cda);
rcu_read_unlock();
} else {
rcu_read_unlock();
nfnl_lock(subsys_id);
- if (rcu_dereference_protected(table[subsys_id].subsys,
- lockdep_is_held(&table[subsys_id].mutex)) != ss ||
+ if (nfnl_dereference_protected(subsys_id) != ss ||
nfnetlink_find_client(type, ss) != nc)
err = -EAGAIN;
else if (nc->call)
- err = nc->call(net->nfnl, skb, nlh,
- (const struct nlattr **)cda);
+ err = nc->call(net, net->nfnl, skb, nlh,
+ (const struct nlattr **)cda);
else
err = -EINVAL;
nfnl_unlock(subsys_id);
@@ -295,30 +292,26 @@ replay:
if (!skb)
return netlink_ack(oskb, nlh, -ENOMEM);
- skb->sk = oskb->sk;
-
nfnl_lock(subsys_id);
- ss = rcu_dereference_protected(table[subsys_id].subsys,
- lockdep_is_held(&table[subsys_id].mutex));
+ ss = nfnl_dereference_protected(subsys_id);
if (!ss) {
#ifdef CONFIG_MODULES
nfnl_unlock(subsys_id);
request_module("nfnetlink-subsys-%d", subsys_id);
nfnl_lock(subsys_id);
- ss = rcu_dereference_protected(table[subsys_id].subsys,
- lockdep_is_held(&table[subsys_id].mutex));
+ ss = nfnl_dereference_protected(subsys_id);
if (!ss)
#endif
{
nfnl_unlock(subsys_id);
- netlink_ack(skb, nlh, -EOPNOTSUPP);
+ netlink_ack(oskb, nlh, -EOPNOTSUPP);
return kfree_skb(skb);
}
}
if (!ss->commit || !ss->abort) {
nfnl_unlock(subsys_id);
- netlink_ack(skb, nlh, -EOPNOTSUPP);
+ netlink_ack(oskb, nlh, -EOPNOTSUPP);
return kfree_skb(skb);
}
@@ -328,10 +321,12 @@ replay:
nlh = nlmsg_hdr(skb);
err = 0;
- if (nlmsg_len(nlh) < sizeof(struct nfgenmsg) ||
- skb->len < nlh->nlmsg_len) {
- err = -EINVAL;
- goto ack;
+ if (nlh->nlmsg_len < NLMSG_HDRLEN ||
+ skb->len < nlh->nlmsg_len ||
+ nlmsg_len(nlh) < sizeof(struct nfgenmsg)) {
+ nfnl_err_reset(&err_list);
+ status |= NFNL_BATCH_FAILURE;
+ goto done;
}
/* Only requests are handled by the kernel */
@@ -381,7 +376,7 @@ replay:
goto ack;
if (nc->call_batch) {
- err = nc->call_batch(net->nfnl, skb, nlh,
+ err = nc->call_batch(net, net->nfnl, skb, nlh,
(const struct nlattr **)cda);
}
@@ -406,7 +401,7 @@ ack:
* pointing to the batch header.
*/
nfnl_err_reset(&err_list);
- netlink_ack(skb, nlmsg_hdr(oskb), -ENOMEM);
+ netlink_ack(oskb, nlmsg_hdr(oskb), -ENOMEM);
status |= NFNL_BATCH_FAILURE;
goto done;
}
@@ -425,15 +420,15 @@ next:
}
done:
if (status & NFNL_BATCH_REPLAY) {
- ss->abort(oskb);
+ ss->abort(net, oskb);
nfnl_err_reset(&err_list);
nfnl_unlock(subsys_id);
kfree_skb(skb);
goto replay;
} else if (status == NFNL_BATCH_DONE) {
- ss->commit(oskb);
+ ss->commit(net, oskb);
} else {
- ss->abort(oskb);
+ ss->abort(net, oskb);
}
nfnl_err_deliver(&err_list, oskb);
@@ -492,7 +487,7 @@ static int nfnetlink_bind(struct net *net, int group)
type = nfnl_group2type[group];
rcu_read_lock();
- ss = nfnetlink_get_subsys(type);
+ ss = nfnetlink_get_subsys(type << 8);
rcu_read_unlock();
if (!ss)
request_module("nfnetlink-subsys-%d", type);
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index fefbf5f0b28d..4c2b4c0c4d5f 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -46,12 +46,11 @@ struct nfacct_filter {
#define NFACCT_F_QUOTA (NFACCT_F_QUOTA_PKTS | NFACCT_F_QUOTA_BYTES)
#define NFACCT_OVERQUOTA_BIT 2 /* NFACCT_F_OVERQUOTA */
-static int
-nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh, const struct nlattr * const tb[])
+static int nfnl_acct_new(struct net *net, struct sock *nfnl,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
+ const struct nlattr * const tb[])
{
struct nf_acct *nfacct, *matching = NULL;
- struct net *net = sock_net(nfnl);
char *acct_name;
unsigned int size = 0;
u32 flags = 0;
@@ -243,6 +242,9 @@ nfacct_filter_alloc(const struct nlattr * const attr)
if (err < 0)
return ERR_PTR(err);
+ if (!tb[NFACCT_FILTER_MASK] || !tb[NFACCT_FILTER_VALUE])
+ return ERR_PTR(-EINVAL);
+
filter = kzalloc(sizeof(struct nfacct_filter), GFP_KERNEL);
if (!filter)
return ERR_PTR(-ENOMEM);
@@ -253,11 +255,10 @@ nfacct_filter_alloc(const struct nlattr * const attr)
return filter;
}
-static int
-nfnl_acct_get(struct sock *nfnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh, const struct nlattr * const tb[])
+static int nfnl_acct_get(struct net *net, struct sock *nfnl,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
+ const struct nlattr * const tb[])
{
- struct net *net = sock_net(nfnl);
int ret = -ENOENT;
struct nf_acct *cur;
char *acct_name;
@@ -333,11 +334,10 @@ static int nfnl_acct_try_del(struct nf_acct *cur)
return ret;
}
-static int
-nfnl_acct_del(struct sock *nfnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh, const struct nlattr * const tb[])
+static int nfnl_acct_del(struct net *net, struct sock *nfnl,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
+ const struct nlattr * const tb[])
{
- struct net *net = sock_net(nfnl);
char *acct_name;
struct nf_acct *cur;
int ret = -ENOENT;
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index 54330fb5efaf..e924e95fcc7f 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -286,9 +286,9 @@ nfnl_cthelper_update(const struct nlattr * const tb[],
return 0;
}
-static int
-nfnl_cthelper_new(struct sock *nfnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh, const struct nlattr * const tb[])
+static int nfnl_cthelper_new(struct net *net, struct sock *nfnl,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
+ const struct nlattr * const tb[])
{
const char *helper_name;
struct nf_conntrack_helper *cur, *helper = NULL;
@@ -498,9 +498,9 @@ out:
return skb->len;
}
-static int
-nfnl_cthelper_get(struct sock *nfnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh, const struct nlattr * const tb[])
+static int nfnl_cthelper_get(struct net *net, struct sock *nfnl,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
+ const struct nlattr * const tb[])
{
int ret = -ENOENT, i;
struct nf_conntrack_helper *cur;
@@ -570,9 +570,9 @@ nfnl_cthelper_get(struct sock *nfnl, struct sk_buff *skb,
return ret;
}
-static int
-nfnl_cthelper_del(struct sock *nfnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh, const struct nlattr * const tb[])
+static int nfnl_cthelper_del(struct net *net, struct sock *nfnl,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
+ const struct nlattr * const tb[])
{
char *helper_name = NULL;
struct nf_conntrack_helper *cur;
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index c7a2d0e1c462..2671b9deb103 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -38,8 +38,6 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
MODULE_DESCRIPTION("cttimeout: Extended Netfilter Connection Tracking timeout tuning");
-static LIST_HEAD(cttimeout_list);
-
static const struct nla_policy cttimeout_nla_policy[CTA_TIMEOUT_MAX+1] = {
[CTA_TIMEOUT_NAME] = { .type = NLA_NUL_STRING,
.len = CTNL_TIMEOUT_NAME_MAX - 1},
@@ -67,16 +65,15 @@ ctnl_timeout_parse_policy(void *timeouts, struct nf_conntrack_l4proto *l4proto,
return ret;
}
-static int
-cttimeout_new_timeout(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const cda[])
+static int cttimeout_new_timeout(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const cda[])
{
__u16 l3num;
__u8 l4num;
struct nf_conntrack_l4proto *l4proto;
struct ctnl_timeout *timeout, *matching = NULL;
- struct net *net = sock_net(skb->sk);
char *name;
int ret;
@@ -90,7 +87,7 @@ cttimeout_new_timeout(struct sock *ctnl, struct sk_buff *skb,
l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO]));
l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]);
- list_for_each_entry(timeout, &cttimeout_list, head) {
+ list_for_each_entry(timeout, &net->nfct_timeout_list, head) {
if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0)
continue;
@@ -145,7 +142,7 @@ cttimeout_new_timeout(struct sock *ctnl, struct sk_buff *skb,
timeout->l3num = l3num;
timeout->l4proto = l4proto;
atomic_set(&timeout->refcnt, 1);
- list_add_tail_rcu(&timeout->head, &cttimeout_list);
+ list_add_tail_rcu(&timeout->head, &net->nfct_timeout_list);
return 0;
err:
@@ -209,6 +206,7 @@ nla_put_failure:
static int
ctnl_timeout_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
+ struct net *net = sock_net(skb->sk);
struct ctnl_timeout *cur, *last;
if (cb->args[2])
@@ -219,7 +217,7 @@ ctnl_timeout_dump(struct sk_buff *skb, struct netlink_callback *cb)
cb->args[1] = 0;
rcu_read_lock();
- list_for_each_entry_rcu(cur, &cttimeout_list, head) {
+ list_for_each_entry_rcu(cur, &net->nfct_timeout_list, head) {
if (last) {
if (cur != last)
continue;
@@ -240,10 +238,10 @@ ctnl_timeout_dump(struct sk_buff *skb, struct netlink_callback *cb)
return skb->len;
}
-static int
-cttimeout_get_timeout(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const cda[])
+static int cttimeout_get_timeout(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const cda[])
{
int ret = -ENOENT;
char *name;
@@ -260,7 +258,7 @@ cttimeout_get_timeout(struct sock *ctnl, struct sk_buff *skb,
return -EINVAL;
name = nla_data(cda[CTA_TIMEOUT_NAME]);
- list_for_each_entry(cur, &cttimeout_list, head) {
+ list_for_each_entry(cur, &net->nfct_timeout_list, head) {
struct sk_buff *skb2;
if (strncmp(cur->name, name, CTNL_TIMEOUT_NAME_MAX) != 0)
@@ -301,17 +299,17 @@ static void untimeout(struct nf_conntrack_tuple_hash *i,
RCU_INIT_POINTER(timeout_ext->timeout, NULL);
}
-static void ctnl_untimeout(struct ctnl_timeout *timeout)
+static void ctnl_untimeout(struct net *net, struct ctnl_timeout *timeout)
{
struct nf_conntrack_tuple_hash *h;
const struct hlist_nulls_node *nn;
int i;
local_bh_disable();
- for (i = 0; i < init_net.ct.htable_size; i++) {
- spin_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
- if (i < init_net.ct.htable_size) {
- hlist_nulls_for_each_entry(h, nn, &init_net.ct.hash[i], hnnode)
+ for (i = 0; i < net->ct.htable_size; i++) {
+ nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
+ if (i < net->ct.htable_size) {
+ hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode)
untimeout(h, timeout);
}
spin_unlock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
@@ -320,7 +318,7 @@ static void ctnl_untimeout(struct ctnl_timeout *timeout)
}
/* try to delete object, fail if it is still in use. */
-static int ctnl_timeout_try_del(struct ctnl_timeout *timeout)
+static int ctnl_timeout_try_del(struct net *net, struct ctnl_timeout *timeout)
{
int ret = 0;
@@ -329,7 +327,7 @@ static int ctnl_timeout_try_del(struct ctnl_timeout *timeout)
/* We are protected by nfnl mutex. */
list_del_rcu(&timeout->head);
nf_ct_l4proto_put(timeout->l4proto);
- ctnl_untimeout(timeout);
+ ctnl_untimeout(net, timeout);
kfree_rcu(timeout, rcu_head);
} else {
/* still in use, restore reference counter. */
@@ -339,28 +337,28 @@ static int ctnl_timeout_try_del(struct ctnl_timeout *timeout)
return ret;
}
-static int
-cttimeout_del_timeout(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const cda[])
+static int cttimeout_del_timeout(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const cda[])
{
- char *name;
struct ctnl_timeout *cur;
int ret = -ENOENT;
+ char *name;
if (!cda[CTA_TIMEOUT_NAME]) {
- list_for_each_entry(cur, &cttimeout_list, head)
- ctnl_timeout_try_del(cur);
+ list_for_each_entry(cur, &net->nfct_timeout_list, head)
+ ctnl_timeout_try_del(net, cur);
return 0;
}
name = nla_data(cda[CTA_TIMEOUT_NAME]);
- list_for_each_entry(cur, &cttimeout_list, head) {
+ list_for_each_entry(cur, &net->nfct_timeout_list, head) {
if (strncmp(cur->name, name, CTNL_TIMEOUT_NAME_MAX) != 0)
continue;
- ret = ctnl_timeout_try_del(cur);
+ ret = ctnl_timeout_try_del(net, cur);
if (ret < 0)
return ret;
@@ -369,15 +367,14 @@ cttimeout_del_timeout(struct sock *ctnl, struct sk_buff *skb,
return ret;
}
-static int
-cttimeout_default_set(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const cda[])
+static int cttimeout_default_set(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const cda[])
{
__u16 l3num;
__u8 l4num;
struct nf_conntrack_l4proto *l4proto;
- struct net *net = sock_net(skb->sk);
unsigned int *timeouts;
int ret;
@@ -459,14 +456,14 @@ nla_put_failure:
return -1;
}
-static int cttimeout_default_get(struct sock *ctnl, struct sk_buff *skb,
+static int cttimeout_default_get(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const cda[])
{
__u16 l3num;
__u8 l4num;
struct nf_conntrack_l4proto *l4proto;
- struct net *net = sock_net(skb->sk);
struct sk_buff *skb2;
int ret, err;
@@ -511,12 +508,13 @@ err:
}
#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
-static struct ctnl_timeout *ctnl_timeout_find_get(const char *name)
+static struct ctnl_timeout *
+ctnl_timeout_find_get(struct net *net, const char *name)
{
struct ctnl_timeout *timeout, *matching = NULL;
rcu_read_lock();
- list_for_each_entry_rcu(timeout, &cttimeout_list, head) {
+ list_for_each_entry_rcu(timeout, &net->nfct_timeout_list, head) {
if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0)
continue;
@@ -569,10 +567,39 @@ static const struct nfnetlink_subsystem cttimeout_subsys = {
MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK_TIMEOUT);
+static int __net_init cttimeout_net_init(struct net *net)
+{
+ INIT_LIST_HEAD(&net->nfct_timeout_list);
+
+ return 0;
+}
+
+static void __net_exit cttimeout_net_exit(struct net *net)
+{
+ struct ctnl_timeout *cur, *tmp;
+
+ ctnl_untimeout(net, NULL);
+
+ list_for_each_entry_safe(cur, tmp, &net->nfct_timeout_list, head) {
+ list_del_rcu(&cur->head);
+ nf_ct_l4proto_put(cur->l4proto);
+ kfree_rcu(cur, rcu_head);
+ }
+}
+
+static struct pernet_operations cttimeout_ops = {
+ .init = cttimeout_net_init,
+ .exit = cttimeout_net_exit,
+};
+
static int __init cttimeout_init(void)
{
int ret;
+ ret = register_pernet_subsys(&cttimeout_ops);
+ if (ret < 0)
+ return ret;
+
ret = nfnetlink_subsys_register(&cttimeout_subsys);
if (ret < 0) {
pr_err("cttimeout_init: cannot register cttimeout with "
@@ -586,28 +613,17 @@ static int __init cttimeout_init(void)
return 0;
err_out:
+ unregister_pernet_subsys(&cttimeout_ops);
return ret;
}
static void __exit cttimeout_exit(void)
{
- struct ctnl_timeout *cur, *tmp;
-
pr_info("cttimeout: unregistering from nfnetlink.\n");
nfnetlink_subsys_unregister(&cttimeout_subsys);
- /* Make sure no conntrack objects refer to custom timeouts anymore. */
- ctnl_untimeout(NULL);
-
- list_for_each_entry_safe(cur, tmp, &cttimeout_list, head) {
- list_del_rcu(&cur->head);
- /* We are sure that our objects have no clients at this point,
- * it's safe to release them all without checking refcnt.
- */
- nf_ct_l4proto_put(cur->l4proto);
- kfree_rcu(cur, rcu_head);
- }
+ unregister_pernet_subsys(&cttimeout_ops);
#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
RCU_INIT_POINTER(nf_ct_timeout_find_get_hook, NULL);
RCU_INIT_POINTER(nf_ct_timeout_put_hook, NULL);
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 06eb48fceb42..11f81c8385fc 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -293,24 +293,20 @@ nfulnl_set_nlbufsiz(struct nfulnl_instance *inst, u_int32_t nlbufsiz)
return status;
}
-static int
+static void
nfulnl_set_timeout(struct nfulnl_instance *inst, u_int32_t timeout)
{
spin_lock_bh(&inst->lock);
inst->flushtimeout = timeout;
spin_unlock_bh(&inst->lock);
-
- return 0;
}
-static int
+static void
nfulnl_set_qthresh(struct nfulnl_instance *inst, u_int32_t qthresh)
{
spin_lock_bh(&inst->lock);
inst->qthreshold = qthresh;
spin_unlock_bh(&inst->lock);
-
- return 0;
}
static int
@@ -334,14 +330,13 @@ nfulnl_alloc_skb(struct net *net, u32 peer_portid, unsigned int inst_size,
* message. WARNING: has to be <= 128k due to slab restrictions */
n = max(inst_size, pkt_size);
- skb = nfnetlink_alloc_skb(net, n, peer_portid, GFP_ATOMIC);
+ skb = alloc_skb(n, GFP_ATOMIC);
if (!skb) {
if (n > pkt_size) {
/* try to allocate only as much as we need for current
* packet */
- skb = nfnetlink_alloc_skb(net, pkt_size,
- peer_portid, GFP_ATOMIC);
+ skb = alloc_skb(pkt_size, GFP_ATOMIC);
}
}
@@ -789,10 +784,9 @@ static struct notifier_block nfulnl_rtnl_notifier = {
.notifier_call = nfulnl_rcv_nl_event,
};
-static int
-nfulnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const nfqa[])
+static int nfulnl_recv_unsupp(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
+ const struct nlattr * const nfqa[])
{
return -ENOTSUPP;
}
@@ -813,19 +807,17 @@ static const struct nla_policy nfula_cfg_policy[NFULA_CFG_MAX+1] = {
[NFULA_CFG_FLAGS] = { .type = NLA_U16 },
};
-static int
-nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const nfula[])
+static int nfulnl_recv_config(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
+ const struct nlattr * const nfula[])
{
struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u_int16_t group_num = ntohs(nfmsg->res_id);
struct nfulnl_instance *inst;
struct nfulnl_msg_config_cmd *cmd = NULL;
- struct net *net = sock_net(ctnl);
struct nfnl_log_net *log = nfnl_log_pernet(net);
int ret = 0;
- u16 flags;
+ u16 flags = 0;
if (nfula[NFULA_CFG_CMD]) {
u_int8_t pf = nfmsg->nfgen_family;
@@ -895,7 +887,7 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
goto out_put;
default:
ret = -ENOTSUPP;
- break;
+ goto out_put;
}
} else if (!inst) {
ret = -ENODEV;
@@ -1064,15 +1056,26 @@ static int __net_init nfnl_log_net_init(struct net *net)
{
unsigned int i;
struct nfnl_log_net *log = nfnl_log_pernet(net);
+#ifdef CONFIG_PROC_FS
+ struct proc_dir_entry *proc;
+ kuid_t root_uid;
+ kgid_t root_gid;
+#endif
for (i = 0; i < INSTANCE_BUCKETS; i++)
INIT_HLIST_HEAD(&log->instance_table[i]);
spin_lock_init(&log->instances_lock);
#ifdef CONFIG_PROC_FS
- if (!proc_create("nfnetlink_log", 0440,
- net->nf.proc_netfilter, &nful_file_ops))
+ proc = proc_create("nfnetlink_log", 0440,
+ net->nf.proc_netfilter, &nful_file_ops);
+ if (!proc)
return -ENOMEM;
+
+ root_uid = make_kuid(net->user_ns, 0);
+ root_gid = make_kgid(net->user_ns, 0);
+ if (uid_valid(root_uid) && gid_valid(root_gid))
+ proc_set_user(proc, root_uid, root_gid);
#endif
return 0;
}
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 7d81d280cb4f..cb5b630a645b 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -301,7 +301,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
__be32 **packet_id_ptr)
{
size_t size;
- size_t data_len = 0, cap_len = 0, rem_len = 0;
+ size_t data_len = 0, cap_len = 0;
unsigned int hlen = 0;
struct sk_buff *skb;
struct nlattr *nla;
@@ -361,12 +361,12 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
hlen = min_t(unsigned int, hlen, data_len);
size += sizeof(struct nlattr) + hlen;
cap_len = entskb->len;
- rem_len = data_len - hlen;
break;
}
+ nfnl_ct = rcu_dereference(nfnl_ct_hook);
+
if (queue->flags & NFQA_CFG_F_CONNTRACK) {
- nfnl_ct = rcu_dereference(nfnl_ct_hook);
if (nfnl_ct != NULL) {
ct = nfnl_ct->get_ct(entskb, &ctinfo);
if (ct != NULL)
@@ -385,8 +385,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
size += nla_total_size(seclen);
}
- skb = __netlink_alloc_skb(net->nfnl, size, rem_len, queue->peer_portid,
- GFP_ATOMIC);
+ skb = alloc_skb(size, GFP_ATOMIC);
if (!skb) {
skb_tx_error(entskb);
return NULL;
@@ -583,7 +582,12 @@ __nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue,
/* nfnetlink_unicast will either free the nskb or add it to a socket */
err = nfnetlink_unicast(nskb, net, queue->peer_portid, MSG_DONTWAIT);
if (err < 0) {
- queue->queue_user_dropped++;
+ if (queue->flags & NFQA_CFG_F_FAIL_OPEN) {
+ failopen = 1;
+ err = 0;
+ } else {
+ queue->queue_user_dropped++;
+ }
goto err_out_unlock;
}
@@ -956,10 +960,10 @@ static int nfq_id_after(unsigned int id, unsigned int max)
return (int)(id - max) > 0;
}
-static int
-nfqnl_recv_verdict_batch(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const nfqa[])
+static int nfqnl_recv_verdict_batch(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const nfqa[])
{
struct nfgenmsg *nfmsg = nlmsg_data(nlh);
struct nf_queue_entry *entry, *tmp;
@@ -968,8 +972,6 @@ nfqnl_recv_verdict_batch(struct sock *ctnl, struct sk_buff *skb,
struct nfqnl_instance *queue;
LIST_HEAD(batch_list);
u16 queue_num = ntohs(nfmsg->res_id);
-
- struct net *net = sock_net(ctnl);
struct nfnl_queue_net *q = nfnl_queue_pernet(net);
queue = verdict_instance_lookup(q, queue_num,
@@ -1028,14 +1030,13 @@ static struct nf_conn *nfqnl_ct_parse(struct nfnl_ct_hook *nfnl_ct,
return ct;
}
-static int
-nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const nfqa[])
+static int nfqnl_recv_verdict(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const nfqa[])
{
struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u_int16_t queue_num = ntohs(nfmsg->res_id);
-
struct nfqnl_msg_verdict_hdr *vhdr;
struct nfqnl_instance *queue;
unsigned int verdict;
@@ -1043,8 +1044,6 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
enum ip_conntrack_info uninitialized_var(ctinfo);
struct nfnl_ct_hook *nfnl_ct;
struct nf_conn *ct = NULL;
-
- struct net *net = sock_net(ctnl);
struct nfnl_queue_net *q = nfnl_queue_pernet(net);
queue = instance_lookup(q, queue_num);
@@ -1064,9 +1063,10 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
if (entry == NULL)
return -ENOENT;
+ /* rcu lock already held from nfnl->call_rcu. */
+ nfnl_ct = rcu_dereference(nfnl_ct_hook);
+
if (nfqa[NFQA_CT]) {
- /* rcu lock already held from nfnl->call_rcu. */
- nfnl_ct = rcu_dereference(nfnl_ct_hook);
if (nfnl_ct != NULL)
ct = nfqnl_ct_parse(nfnl_ct, nlh, nfqa, entry, &ctinfo);
}
@@ -1090,10 +1090,9 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
return 0;
}
-static int
-nfqnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const nfqa[])
+static int nfqnl_recv_unsupp(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
+ const struct nlattr * const nfqa[])
{
return -ENOTSUPP;
}
@@ -1108,17 +1107,16 @@ static const struct nf_queue_handler nfqh = {
.nf_hook_drop = &nfqnl_nf_hook_drop,
};
-static int
-nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const nfqa[])
+static int nfqnl_recv_config(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
+ const struct nlattr * const nfqa[])
{
struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u_int16_t queue_num = ntohs(nfmsg->res_id);
struct nfqnl_instance *queue;
struct nfqnl_msg_config_cmd *cmd = NULL;
- struct net *net = sock_net(ctnl);
struct nfnl_queue_net *q = nfnl_queue_pernet(net);
+ __u32 flags = 0, mask = 0;
int ret = 0;
if (nfqa[NFQA_CFG_CMD]) {
@@ -1131,6 +1129,40 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
}
}
+ /* Check if we support these flags in first place, dependencies should
+ * be there too not to break atomicity.
+ */
+ if (nfqa[NFQA_CFG_FLAGS]) {
+ if (!nfqa[NFQA_CFG_MASK]) {
+ /* A mask is needed to specify which flags are being
+ * changed.
+ */
+ return -EINVAL;
+ }
+
+ flags = ntohl(nla_get_be32(nfqa[NFQA_CFG_FLAGS]));
+ mask = ntohl(nla_get_be32(nfqa[NFQA_CFG_MASK]));
+
+ if (flags >= NFQA_CFG_F_MAX)
+ return -EOPNOTSUPP;
+
+#if !IS_ENABLED(CONFIG_NETWORK_SECMARK)
+ if (flags & mask & NFQA_CFG_F_SECCTX)
+ return -EOPNOTSUPP;
+#endif
+ if ((flags & mask & NFQA_CFG_F_CONNTRACK) &&
+ !rcu_access_pointer(nfnl_ct_hook)) {
+#ifdef CONFIG_MODULES
+ nfnl_unlock(NFNL_SUBSYS_QUEUE);
+ request_module("ip_conntrack_netlink");
+ nfnl_lock(NFNL_SUBSYS_QUEUE);
+ if (rcu_access_pointer(nfnl_ct_hook))
+ return -EAGAIN;
+#endif
+ return -EOPNOTSUPP;
+ }
+ }
+
rcu_read_lock();
queue = instance_lookup(q, queue_num);
if (queue && queue->peer_portid != NETLINK_CB(skb).portid) {
@@ -1158,70 +1190,38 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
goto err_out_unlock;
}
instance_destroy(q, queue);
- break;
+ goto err_out_unlock;
case NFQNL_CFG_CMD_PF_BIND:
case NFQNL_CFG_CMD_PF_UNBIND:
break;
default:
ret = -ENOTSUPP;
- break;
+ goto err_out_unlock;
}
}
+ if (!queue) {
+ ret = -ENODEV;
+ goto err_out_unlock;
+ }
+
if (nfqa[NFQA_CFG_PARAMS]) {
- struct nfqnl_msg_config_params *params;
+ struct nfqnl_msg_config_params *params =
+ nla_data(nfqa[NFQA_CFG_PARAMS]);
- if (!queue) {
- ret = -ENODEV;
- goto err_out_unlock;
- }
- params = nla_data(nfqa[NFQA_CFG_PARAMS]);
nfqnl_set_mode(queue, params->copy_mode,
ntohl(params->copy_range));
}
if (nfqa[NFQA_CFG_QUEUE_MAXLEN]) {
- __be32 *queue_maxlen;
+ __be32 *queue_maxlen = nla_data(nfqa[NFQA_CFG_QUEUE_MAXLEN]);
- if (!queue) {
- ret = -ENODEV;
- goto err_out_unlock;
- }
- queue_maxlen = nla_data(nfqa[NFQA_CFG_QUEUE_MAXLEN]);
spin_lock_bh(&queue->lock);
queue->queue_maxlen = ntohl(*queue_maxlen);
spin_unlock_bh(&queue->lock);
}
if (nfqa[NFQA_CFG_FLAGS]) {
- __u32 flags, mask;
-
- if (!queue) {
- ret = -ENODEV;
- goto err_out_unlock;
- }
-
- if (!nfqa[NFQA_CFG_MASK]) {
- /* A mask is needed to specify which flags are being
- * changed.
- */
- ret = -EINVAL;
- goto err_out_unlock;
- }
-
- flags = ntohl(nla_get_be32(nfqa[NFQA_CFG_FLAGS]));
- mask = ntohl(nla_get_be32(nfqa[NFQA_CFG_MASK]));
-
- if (flags >= NFQA_CFG_F_MAX) {
- ret = -EOPNOTSUPP;
- goto err_out_unlock;
- }
-#if !IS_ENABLED(CONFIG_NETWORK_SECMARK)
- if (flags & mask & NFQA_CFG_F_SECCTX) {
- ret = -EOPNOTSUPP;
- goto err_out_unlock;
- }
-#endif
spin_lock_bh(&queue->lock);
queue->flags &= ~mask;
queue->flags |= flags & mask;
@@ -1417,6 +1417,7 @@ static int __init nfnetlink_queue_init(void)
cleanup_netlink_notifier:
netlink_unregister_notifier(&nfqnl_rtnl_notifier);
+ unregister_pernet_subsys(&nfnl_queue_net_ops);
out:
return status;
}
diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c
index fde5145f2e36..b78c28ba465f 100644
--- a/net/netfilter/nft_byteorder.c
+++ b/net/netfilter/nft_byteorder.c
@@ -8,6 +8,7 @@
* Development of this code funded by Astaro AG (http://www.astaro.com/)
*/
+#include <asm/unaligned.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
@@ -39,6 +40,25 @@ static void nft_byteorder_eval(const struct nft_expr *expr,
d = (void *)dst;
switch (priv->size) {
+ case 8: {
+ u64 src64;
+
+ switch (priv->op) {
+ case NFT_BYTEORDER_NTOH:
+ for (i = 0; i < priv->len / 8; i++) {
+ src64 = get_unaligned((u64 *)&src[i]);
+ put_unaligned_be64(src64, &dst[i]);
+ }
+ break;
+ case NFT_BYTEORDER_HTON:
+ for (i = 0; i < priv->len / 8; i++) {
+ src64 = get_unaligned_be64(&src[i]);
+ put_unaligned(src64, (u64 *)&dst[i]);
+ }
+ break;
+ }
+ break;
+ }
case 4:
switch (priv->op) {
case NFT_BYTEORDER_NTOH:
@@ -101,6 +121,7 @@ static int nft_byteorder_init(const struct nft_ctx *ctx,
switch (priv->size) {
case 2:
case 4:
+ case 8:
break;
default:
return -EINVAL;
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 9c8fab00164b..6228c422c766 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -519,9 +519,9 @@ nla_put_failure:
return -1;
}
-static int
-nfnl_compat_get(struct sock *nfnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh, const struct nlattr * const tb[])
+static int nfnl_compat_get(struct net *net, struct sock *nfnl,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
+ const struct nlattr * const tb[])
{
int ret = 0, target;
struct nfgenmsg *nfmsg;
@@ -660,6 +660,9 @@ nft_match_select_ops(const struct nft_ctx *ctx,
if (IS_ERR(match))
return ERR_PTR(-ENOENT);
+ if (match->matchsize > nla_len(tb[NFTA_MATCH_INFO]))
+ return ERR_PTR(-EINVAL);
+
/* This is the first time we use this match, allocate operations */
nft_match = kzalloc(sizeof(struct nft_xt), GFP_KERNEL);
if (nft_match == NULL)
@@ -740,6 +743,9 @@ nft_target_select_ops(const struct nft_ctx *ctx,
if (IS_ERR(target))
return ERR_PTR(-ENOENT);
+ if (target->targetsize > nla_len(tb[NFTA_TARGET_INFO]))
+ return ERR_PTR(-EINVAL);
+
/* This is the first time we use this target, allocate operations */
nft_target = kzalloc(sizeof(struct nft_xt), GFP_KERNEL);
if (nft_target == NULL)
diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
index 1067fb4c1ffa..c9743f78f219 100644
--- a/net/netfilter/nft_counter.c
+++ b/net/netfilter/nft_counter.c
@@ -47,27 +47,34 @@ static void nft_counter_eval(const struct nft_expr *expr,
local_bh_enable();
}
-static int nft_counter_dump(struct sk_buff *skb, const struct nft_expr *expr)
+static void nft_counter_fetch(const struct nft_counter_percpu __percpu *counter,
+ struct nft_counter *total)
{
- struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
- struct nft_counter_percpu *cpu_stats;
- struct nft_counter total;
+ const struct nft_counter_percpu *cpu_stats;
u64 bytes, packets;
unsigned int seq;
int cpu;
- memset(&total, 0, sizeof(total));
+ memset(total, 0, sizeof(*total));
for_each_possible_cpu(cpu) {
- cpu_stats = per_cpu_ptr(priv->counter, cpu);
+ cpu_stats = per_cpu_ptr(counter, cpu);
do {
seq = u64_stats_fetch_begin_irq(&cpu_stats->syncp);
bytes = cpu_stats->counter.bytes;
packets = cpu_stats->counter.packets;
} while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, seq));
- total.packets += packets;
- total.bytes += bytes;
+ total->packets += packets;
+ total->bytes += bytes;
}
+}
+
+static int nft_counter_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
+ struct nft_counter total;
+
+ nft_counter_fetch(priv->counter, &total);
if (nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(total.bytes)) ||
nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(total.packets)))
@@ -93,7 +100,7 @@ static int nft_counter_init(const struct nft_ctx *ctx,
cpu_stats = netdev_alloc_pcpu_stats(struct nft_counter_percpu);
if (cpu_stats == NULL)
- return ENOMEM;
+ return -ENOMEM;
preempt_disable();
this_cpu = this_cpu_ptr(cpu_stats);
@@ -118,6 +125,31 @@ static void nft_counter_destroy(const struct nft_ctx *ctx,
free_percpu(priv->counter);
}
+static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src)
+{
+ struct nft_counter_percpu_priv *priv = nft_expr_priv(src);
+ struct nft_counter_percpu_priv *priv_clone = nft_expr_priv(dst);
+ struct nft_counter_percpu __percpu *cpu_stats;
+ struct nft_counter_percpu *this_cpu;
+ struct nft_counter total;
+
+ nft_counter_fetch(priv->counter, &total);
+
+ cpu_stats = __netdev_alloc_pcpu_stats(struct nft_counter_percpu,
+ GFP_ATOMIC);
+ if (cpu_stats == NULL)
+ return -ENOMEM;
+
+ preempt_disable();
+ this_cpu = this_cpu_ptr(cpu_stats);
+ this_cpu->counter.packets = total.packets;
+ this_cpu->counter.bytes = total.bytes;
+ preempt_enable();
+
+ priv_clone->counter = cpu_stats;
+ return 0;
+}
+
static struct nft_expr_type nft_counter_type;
static const struct nft_expr_ops nft_counter_ops = {
.type = &nft_counter_type,
@@ -126,6 +158,7 @@ static const struct nft_expr_ops nft_counter_ops = {
.init = nft_counter_init,
.destroy = nft_counter_destroy,
.dump = nft_counter_dump,
+ .clone = nft_counter_clone,
};
static struct nft_expr_type nft_counter_type __read_mostly = {
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 8cbca3432f90..d4a4619fcebc 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -16,6 +16,7 @@
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_acct.h>
#include <net/netfilter/nf_conntrack_tuple.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_ecache.h>
@@ -30,6 +31,18 @@ struct nft_ct {
};
};
+static u64 nft_ct_get_eval_counter(const struct nf_conn_counter *c,
+ enum nft_ct_keys k,
+ enum ip_conntrack_dir d)
+{
+ if (d < IP_CT_DIR_MAX)
+ return k == NFT_CT_BYTES ? atomic64_read(&c[d].bytes) :
+ atomic64_read(&c[d].packets);
+
+ return nft_ct_get_eval_counter(c, k, IP_CT_DIR_ORIGINAL) +
+ nft_ct_get_eval_counter(c, k, IP_CT_DIR_REPLY);
+}
+
static void nft_ct_get_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
@@ -115,6 +128,17 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
return;
}
#endif
+ case NFT_CT_BYTES: /* fallthrough */
+ case NFT_CT_PKTS: {
+ const struct nf_conn_acct *acct = nf_conn_acct_find(ct);
+ u64 count = 0;
+
+ if (acct)
+ count = nft_ct_get_eval_counter(acct->counter,
+ priv->key, priv->dir);
+ memcpy(dest, &count, sizeof(count));
+ return;
+ }
default:
break;
}
@@ -291,6 +315,13 @@ static int nft_ct_get_init(const struct nft_ctx *ctx,
return -EINVAL;
len = FIELD_SIZEOF(struct nf_conntrack_tuple, src.u.all);
break;
+ case NFT_CT_BYTES:
+ case NFT_CT_PKTS:
+ /* no direction? return sum of original + reply */
+ if (tb[NFTA_CT_DIRECTION] == NULL)
+ priv->dir = IP_CT_DIR_MAX;
+ len = sizeof(u64);
+ break;
default:
return -EOPNOTSUPP;
}
@@ -366,6 +397,7 @@ static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr)
goto nla_put_failure;
switch (priv->key) {
+ case NFT_CT_L3PROTOCOL:
case NFT_CT_PROTOCOL:
case NFT_CT_SRC:
case NFT_CT_DST:
@@ -373,6 +405,13 @@ static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr)
case NFT_CT_PROTO_DST:
if (nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir))
goto nla_put_failure;
+ break;
+ case NFT_CT_BYTES:
+ case NFT_CT_PKTS:
+ if (priv->dir < IP_CT_DIR_MAX &&
+ nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir))
+ goto nla_put_failure;
+ break;
default:
break;
}
diff --git a/net/netfilter/nft_dup_netdev.c b/net/netfilter/nft_dup_netdev.c
new file mode 100644
index 000000000000..2cc1e0ef56e8
--- /dev/null
+++ b/net/netfilter/nft_dup_netdev.c
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2015 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_dup_netdev.h>
+
+struct nft_dup_netdev {
+ enum nft_registers sreg_dev:8;
+};
+
+static void nft_dup_netdev_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_dup_netdev *priv = nft_expr_priv(expr);
+ int oif = regs->data[priv->sreg_dev];
+
+ nf_dup_netdev_egress(pkt, oif);
+}
+
+static const struct nla_policy nft_dup_netdev_policy[NFTA_DUP_MAX + 1] = {
+ [NFTA_DUP_SREG_DEV] = { .type = NLA_U32 },
+};
+
+static int nft_dup_netdev_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_dup_netdev *priv = nft_expr_priv(expr);
+
+ if (tb[NFTA_DUP_SREG_DEV] == NULL)
+ return -EINVAL;
+
+ priv->sreg_dev = nft_parse_register(tb[NFTA_DUP_SREG_DEV]);
+ return nft_validate_register_load(priv->sreg_dev, sizeof(int));
+}
+
+static const struct nft_expr_ops nft_dup_netdev_ingress_ops;
+
+static int nft_dup_netdev_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ struct nft_dup_netdev *priv = nft_expr_priv(expr);
+
+ if (nft_dump_register(skb, NFTA_DUP_SREG_DEV, priv->sreg_dev))
+ goto nla_put_failure;
+
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static struct nft_expr_type nft_dup_netdev_type;
+static const struct nft_expr_ops nft_dup_netdev_ops = {
+ .type = &nft_dup_netdev_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_dup_netdev)),
+ .eval = nft_dup_netdev_eval,
+ .init = nft_dup_netdev_init,
+ .dump = nft_dup_netdev_dump,
+};
+
+static struct nft_expr_type nft_dup_netdev_type __read_mostly = {
+ .family = NFPROTO_NETDEV,
+ .name = "dup",
+ .ops = &nft_dup_netdev_ops,
+ .policy = nft_dup_netdev_policy,
+ .maxattr = NFTA_DUP_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_dup_netdev_module_init(void)
+{
+ return nft_register_expr(&nft_dup_netdev_type);
+}
+
+static void __exit nft_dup_netdev_module_exit(void)
+{
+ nft_unregister_expr(&nft_dup_netdev_type);
+}
+
+module_init(nft_dup_netdev_module_init);
+module_exit(nft_dup_netdev_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_ALIAS_NFT_AF_EXPR(5, "dup");
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 513a8ef60a59..9dec3bd1b63c 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -50,8 +50,9 @@ static void *nft_dynset_new(struct nft_set *set, const struct nft_expr *expr,
}
ext = nft_set_elem_ext(set, elem);
- if (priv->expr != NULL)
- nft_expr_clone(nft_set_ext_expr(ext), priv->expr);
+ if (priv->expr != NULL &&
+ nft_expr_clone(nft_set_ext_expr(ext), priv->expr) < 0)
+ return NULL;
return elem;
}
diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c
new file mode 100644
index 000000000000..763ebc3e0b2b
--- /dev/null
+++ b/net/netfilter/nft_fwd_netdev.c
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2015 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_dup_netdev.h>
+
+struct nft_fwd_netdev {
+ enum nft_registers sreg_dev:8;
+};
+
+static void nft_fwd_netdev_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_fwd_netdev *priv = nft_expr_priv(expr);
+ int oif = regs->data[priv->sreg_dev];
+
+ nf_dup_netdev_egress(pkt, oif);
+ regs->verdict.code = NF_DROP;
+}
+
+static const struct nla_policy nft_fwd_netdev_policy[NFTA_FWD_MAX + 1] = {
+ [NFTA_FWD_SREG_DEV] = { .type = NLA_U32 },
+};
+
+static int nft_fwd_netdev_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_fwd_netdev *priv = nft_expr_priv(expr);
+
+ if (tb[NFTA_FWD_SREG_DEV] == NULL)
+ return -EINVAL;
+
+ priv->sreg_dev = nft_parse_register(tb[NFTA_FWD_SREG_DEV]);
+ return nft_validate_register_load(priv->sreg_dev, sizeof(int));
+}
+
+static const struct nft_expr_ops nft_fwd_netdev_ingress_ops;
+
+static int nft_fwd_netdev_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ struct nft_fwd_netdev *priv = nft_expr_priv(expr);
+
+ if (nft_dump_register(skb, NFTA_FWD_SREG_DEV, priv->sreg_dev))
+ goto nla_put_failure;
+
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static struct nft_expr_type nft_fwd_netdev_type;
+static const struct nft_expr_ops nft_fwd_netdev_ops = {
+ .type = &nft_fwd_netdev_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_fwd_netdev)),
+ .eval = nft_fwd_netdev_eval,
+ .init = nft_fwd_netdev_init,
+ .dump = nft_fwd_netdev_dump,
+};
+
+static struct nft_expr_type nft_fwd_netdev_type __read_mostly = {
+ .family = NFPROTO_NETDEV,
+ .name = "fwd",
+ .ops = &nft_fwd_netdev_ops,
+ .policy = nft_fwd_netdev_policy,
+ .maxattr = NFTA_FWD_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_fwd_netdev_module_init(void)
+{
+ return nft_register_expr(&nft_fwd_netdev_type);
+}
+
+static void __exit nft_fwd_netdev_module_exit(void)
+{
+ nft_unregister_expr(&nft_fwd_netdev_type);
+}
+
+module_init(nft_fwd_netdev_module_init);
+module_exit(nft_fwd_netdev_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_ALIAS_NFT_AF_EXPR(5, "fwd");
diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c
index 5d67938f8b2f..99d18578afc6 100644
--- a/net/netfilter/nft_limit.c
+++ b/net/netfilter/nft_limit.c
@@ -26,6 +26,7 @@ struct nft_limit {
u64 rate;
u64 nsecs;
u32 burst;
+ bool invert;
};
static inline bool nft_limit_eval(struct nft_limit *limit, u64 cost)
@@ -44,11 +45,11 @@ static inline bool nft_limit_eval(struct nft_limit *limit, u64 cost)
if (delta >= 0) {
limit->tokens = delta;
spin_unlock_bh(&limit_lock);
- return false;
+ return limit->invert;
}
limit->tokens = tokens;
spin_unlock_bh(&limit_lock);
- return true;
+ return !limit->invert;
}
static int nft_limit_init(struct nft_limit *limit,
@@ -78,6 +79,12 @@ static int nft_limit_init(struct nft_limit *limit,
limit->rate = rate;
}
+ if (tb[NFTA_LIMIT_FLAGS]) {
+ u32 flags = ntohl(nla_get_be32(tb[NFTA_LIMIT_FLAGS]));
+
+ if (flags & NFT_LIMIT_F_INV)
+ limit->invert = true;
+ }
limit->last = ktime_get_ns();
return 0;
@@ -86,13 +93,15 @@ static int nft_limit_init(struct nft_limit *limit,
static int nft_limit_dump(struct sk_buff *skb, const struct nft_limit *limit,
enum nft_limit_type type)
{
+ u32 flags = limit->invert ? NFT_LIMIT_F_INV : 0;
u64 secs = div_u64(limit->nsecs, NSEC_PER_SEC);
u64 rate = limit->rate - limit->burst;
if (nla_put_be64(skb, NFTA_LIMIT_RATE, cpu_to_be64(rate)) ||
nla_put_be64(skb, NFTA_LIMIT_UNIT, cpu_to_be64(secs)) ||
nla_put_be32(skb, NFTA_LIMIT_BURST, htonl(limit->burst)) ||
- nla_put_be32(skb, NFTA_LIMIT_TYPE, htonl(type)))
+ nla_put_be32(skb, NFTA_LIMIT_TYPE, htonl(type)) ||
+ nla_put_be32(skb, NFTA_LIMIT_FLAGS, htonl(flags)))
goto nla_put_failure;
return 0;
@@ -120,6 +129,7 @@ static const struct nla_policy nft_limit_policy[NFTA_LIMIT_MAX + 1] = {
[NFTA_LIMIT_UNIT] = { .type = NLA_U64 },
[NFTA_LIMIT_BURST] = { .type = NLA_U32 },
[NFTA_LIMIT_TYPE] = { .type = NLA_U32 },
+ [NFTA_LIMIT_FLAGS] = { .type = NLA_U32 },
};
static int nft_limit_pkts_init(const struct nft_ctx *ctx,
diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c
index 9aea747b43ea..81b5ad6165ac 100644
--- a/net/netfilter/nft_masq.c
+++ b/net/netfilter/nft_masq.c
@@ -17,7 +17,9 @@
#include <net/netfilter/nft_masq.h>
const struct nla_policy nft_masq_policy[NFTA_MASQ_MAX + 1] = {
- [NFTA_MASQ_FLAGS] = { .type = NLA_U32 },
+ [NFTA_MASQ_FLAGS] = { .type = NLA_U32 },
+ [NFTA_MASQ_REG_PROTO_MIN] = { .type = NLA_U32 },
+ [NFTA_MASQ_REG_PROTO_MAX] = { .type = NLA_U32 },
};
EXPORT_SYMBOL_GPL(nft_masq_policy);
@@ -40,6 +42,7 @@ int nft_masq_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
{
+ u32 plen = FIELD_SIZEOF(struct nf_nat_range, min_addr.all);
struct nft_masq *priv = nft_expr_priv(expr);
int err;
@@ -47,12 +50,32 @@ int nft_masq_init(const struct nft_ctx *ctx,
if (err)
return err;
- if (tb[NFTA_MASQ_FLAGS] == NULL)
- return 0;
-
- priv->flags = ntohl(nla_get_be32(tb[NFTA_MASQ_FLAGS]));
- if (priv->flags & ~NF_NAT_RANGE_MASK)
- return -EINVAL;
+ if (tb[NFTA_MASQ_FLAGS]) {
+ priv->flags = ntohl(nla_get_be32(tb[NFTA_MASQ_FLAGS]));
+ if (priv->flags & ~NF_NAT_RANGE_MASK)
+ return -EINVAL;
+ }
+
+ if (tb[NFTA_MASQ_REG_PROTO_MIN]) {
+ priv->sreg_proto_min =
+ nft_parse_register(tb[NFTA_MASQ_REG_PROTO_MIN]);
+
+ err = nft_validate_register_load(priv->sreg_proto_min, plen);
+ if (err < 0)
+ return err;
+
+ if (tb[NFTA_MASQ_REG_PROTO_MAX]) {
+ priv->sreg_proto_max =
+ nft_parse_register(tb[NFTA_MASQ_REG_PROTO_MAX]);
+
+ err = nft_validate_register_load(priv->sreg_proto_max,
+ plen);
+ if (err < 0)
+ return err;
+ } else {
+ priv->sreg_proto_max = priv->sreg_proto_min;
+ }
+ }
return 0;
}
@@ -62,12 +85,18 @@ int nft_masq_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
const struct nft_masq *priv = nft_expr_priv(expr);
- if (priv->flags == 0)
- return 0;
-
- if (nla_put_be32(skb, NFTA_MASQ_FLAGS, htonl(priv->flags)))
+ if (priv->flags != 0 &&
+ nla_put_be32(skb, NFTA_MASQ_FLAGS, htonl(priv->flags)))
goto nla_put_failure;
+ if (priv->sreg_proto_min) {
+ if (nft_dump_register(skb, NFTA_MASQ_REG_PROTO_MIN,
+ priv->sreg_proto_min) ||
+ nft_dump_register(skb, NFTA_MASQ_REG_PROTO_MAX,
+ priv->sreg_proto_max))
+ goto nla_put_failure;
+ }
+
return 0;
nla_put_failure:
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index e4ad2c24bc41..16c50b0dd426 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -18,12 +18,18 @@
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/smp.h>
+#include <linux/static_key.h>
#include <net/dst.h>
#include <net/sock.h>
#include <net/tcp_states.h> /* for TCP_TIME_WAIT */
#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
#include <net/netfilter/nft_meta.h>
+#include <uapi/linux/netfilter_bridge.h> /* NF_BR_PRE_ROUTING */
+
+static DEFINE_PER_CPU(struct rnd_state, nft_prandom_state);
+
void nft_meta_get_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
@@ -31,6 +37,7 @@ void nft_meta_get_eval(const struct nft_expr *expr,
const struct nft_meta *priv = nft_expr_priv(expr);
const struct sk_buff *skb = pkt->skb;
const struct net_device *in = pkt->in, *out = pkt->out;
+ struct sock *sk;
u32 *dest = &regs->data[priv->dreg];
switch (priv->key) {
@@ -86,33 +93,35 @@ void nft_meta_get_eval(const struct nft_expr *expr,
*(u16 *)dest = out->type;
break;
case NFT_META_SKUID:
- if (skb->sk == NULL || !sk_fullsock(skb->sk))
+ sk = skb_to_full_sk(skb);
+ if (!sk || !sk_fullsock(sk))
goto err;
- read_lock_bh(&skb->sk->sk_callback_lock);
- if (skb->sk->sk_socket == NULL ||
- skb->sk->sk_socket->file == NULL) {
- read_unlock_bh(&skb->sk->sk_callback_lock);
+ read_lock_bh(&sk->sk_callback_lock);
+ if (sk->sk_socket == NULL ||
+ sk->sk_socket->file == NULL) {
+ read_unlock_bh(&sk->sk_callback_lock);
goto err;
}
*dest = from_kuid_munged(&init_user_ns,
- skb->sk->sk_socket->file->f_cred->fsuid);
- read_unlock_bh(&skb->sk->sk_callback_lock);
+ sk->sk_socket->file->f_cred->fsuid);
+ read_unlock_bh(&sk->sk_callback_lock);
break;
case NFT_META_SKGID:
- if (skb->sk == NULL || !sk_fullsock(skb->sk))
+ sk = skb_to_full_sk(skb);
+ if (!sk || !sk_fullsock(sk))
goto err;
- read_lock_bh(&skb->sk->sk_callback_lock);
- if (skb->sk->sk_socket == NULL ||
- skb->sk->sk_socket->file == NULL) {
- read_unlock_bh(&skb->sk->sk_callback_lock);
+ read_lock_bh(&sk->sk_callback_lock);
+ if (sk->sk_socket == NULL ||
+ sk->sk_socket->file == NULL) {
+ read_unlock_bh(&sk->sk_callback_lock);
goto err;
}
*dest = from_kgid_munged(&init_user_ns,
- skb->sk->sk_socket->file->f_cred->fsgid);
- read_unlock_bh(&skb->sk->sk_callback_lock);
+ sk->sk_socket->file->f_cred->fsgid);
+ read_unlock_bh(&sk->sk_callback_lock);
break;
#ifdef CONFIG_IP_ROUTE_CLASSID
case NFT_META_RTCLASSID: {
@@ -168,11 +177,17 @@ void nft_meta_get_eval(const struct nft_expr *expr,
break;
#ifdef CONFIG_CGROUP_NET_CLASSID
case NFT_META_CGROUP:
- if (skb->sk == NULL || !sk_fullsock(skb->sk))
+ sk = skb_to_full_sk(skb);
+ if (!sk || !sk_fullsock(sk))
goto err;
- *dest = skb->sk->sk_classid;
+ *dest = sock_cgroup_classid(&sk->sk_cgrp_data);
break;
#endif
+ case NFT_META_PRANDOM: {
+ struct rnd_state *state = this_cpu_ptr(&nft_prandom_state);
+ *dest = prandom_u32_state(state);
+ break;
+ }
default:
WARN_ON(1);
goto err;
@@ -184,6 +199,13 @@ err:
}
EXPORT_SYMBOL_GPL(nft_meta_get_eval);
+/* don't change or set _LOOPBACK, _USER, etc. */
+static bool pkt_type_ok(u32 p)
+{
+ return p == PACKET_HOST || p == PACKET_BROADCAST ||
+ p == PACKET_MULTICAST || p == PACKET_OTHERHOST;
+}
+
void nft_meta_set_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
@@ -199,6 +221,11 @@ void nft_meta_set_eval(const struct nft_expr *expr,
case NFT_META_PRIORITY:
skb->priority = value;
break;
+ case NFT_META_PKTTYPE:
+ if (skb->pkt_type != value &&
+ pkt_type_ok(value) && pkt_type_ok(skb->pkt_type))
+ skb->pkt_type = value;
+ break;
case NFT_META_NFTRACE:
skb->nf_trace = 1;
break;
@@ -257,6 +284,10 @@ int nft_meta_get_init(const struct nft_ctx *ctx,
case NFT_META_OIFNAME:
len = IFNAMSIZ;
break;
+ case NFT_META_PRANDOM:
+ prandom_init_once(&nft_prandom_state);
+ len = sizeof(u32);
+ break;
default:
return -EOPNOTSUPP;
}
@@ -267,6 +298,24 @@ int nft_meta_get_init(const struct nft_ctx *ctx,
}
EXPORT_SYMBOL_GPL(nft_meta_get_init);
+static int nft_meta_set_init_pkttype(const struct nft_ctx *ctx)
+{
+ unsigned int hooks;
+
+ switch (ctx->afi->family) {
+ case NFPROTO_BRIDGE:
+ hooks = 1 << NF_BR_PRE_ROUTING;
+ break;
+ case NFPROTO_NETDEV:
+ hooks = 1 << NF_NETDEV_INGRESS;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return nft_chain_validate_hooks(ctx->chain, hooks);
+}
+
int nft_meta_set_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
@@ -284,6 +333,12 @@ int nft_meta_set_init(const struct nft_ctx *ctx,
case NFT_META_NFTRACE:
len = sizeof(u8);
break;
+ case NFT_META_PKTTYPE:
+ err = nft_meta_set_init_pkttype(ctx);
+ if (err)
+ return err;
+ len = sizeof(u8);
+ break;
default:
return -EOPNOTSUPP;
}
@@ -293,6 +348,9 @@ int nft_meta_set_init(const struct nft_ctx *ctx,
if (err < 0)
return err;
+ if (priv->key == NFT_META_NFTRACE)
+ static_branch_inc(&nft_trace_enabled);
+
return 0;
}
EXPORT_SYMBOL_GPL(nft_meta_set_init);
@@ -330,6 +388,16 @@ nla_put_failure:
}
EXPORT_SYMBOL_GPL(nft_meta_set_dump);
+void nft_meta_set_destroy(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
+{
+ const struct nft_meta *priv = nft_expr_priv(expr);
+
+ if (priv->key == NFT_META_NFTRACE)
+ static_branch_dec(&nft_trace_enabled);
+}
+EXPORT_SYMBOL_GPL(nft_meta_set_destroy);
+
static struct nft_expr_type nft_meta_type;
static const struct nft_expr_ops nft_meta_get_ops = {
.type = &nft_meta_type,
@@ -344,6 +412,7 @@ static const struct nft_expr_ops nft_meta_set_ops = {
.size = NFT_EXPR_SIZE(sizeof(struct nft_meta)),
.eval = nft_meta_set_eval,
.init = nft_meta_set_init,
+ .destroy = nft_meta_set_destroy,
.dump = nft_meta_set_dump,
};
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index 09b4b07eb676..12cd4bf16d17 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -107,10 +107,13 @@ err:
}
static const struct nla_policy nft_payload_policy[NFTA_PAYLOAD_MAX + 1] = {
- [NFTA_PAYLOAD_DREG] = { .type = NLA_U32 },
- [NFTA_PAYLOAD_BASE] = { .type = NLA_U32 },
- [NFTA_PAYLOAD_OFFSET] = { .type = NLA_U32 },
- [NFTA_PAYLOAD_LEN] = { .type = NLA_U32 },
+ [NFTA_PAYLOAD_SREG] = { .type = NLA_U32 },
+ [NFTA_PAYLOAD_DREG] = { .type = NLA_U32 },
+ [NFTA_PAYLOAD_BASE] = { .type = NLA_U32 },
+ [NFTA_PAYLOAD_OFFSET] = { .type = NLA_U32 },
+ [NFTA_PAYLOAD_LEN] = { .type = NLA_U32 },
+ [NFTA_PAYLOAD_CSUM_TYPE] = { .type = NLA_U32 },
+ [NFTA_PAYLOAD_CSUM_OFFSET] = { .type = NLA_U32 },
};
static int nft_payload_init(const struct nft_ctx *ctx,
@@ -160,6 +163,118 @@ const struct nft_expr_ops nft_payload_fast_ops = {
.dump = nft_payload_dump,
};
+static void nft_payload_set_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_payload_set *priv = nft_expr_priv(expr);
+ struct sk_buff *skb = pkt->skb;
+ const u32 *src = &regs->data[priv->sreg];
+ int offset, csum_offset;
+ __wsum fsum, tsum;
+ __sum16 sum;
+
+ switch (priv->base) {
+ case NFT_PAYLOAD_LL_HEADER:
+ if (!skb_mac_header_was_set(skb))
+ goto err;
+ offset = skb_mac_header(skb) - skb->data;
+ break;
+ case NFT_PAYLOAD_NETWORK_HEADER:
+ offset = skb_network_offset(skb);
+ break;
+ case NFT_PAYLOAD_TRANSPORT_HEADER:
+ offset = pkt->xt.thoff;
+ break;
+ default:
+ BUG();
+ }
+
+ csum_offset = offset + priv->csum_offset;
+ offset += priv->offset;
+
+ if (priv->csum_type == NFT_PAYLOAD_CSUM_INET &&
+ (priv->base != NFT_PAYLOAD_TRANSPORT_HEADER ||
+ skb->ip_summed != CHECKSUM_PARTIAL)) {
+ if (skb_copy_bits(skb, csum_offset, &sum, sizeof(sum)) < 0)
+ goto err;
+
+ fsum = skb_checksum(skb, offset, priv->len, 0);
+ tsum = csum_partial(src, priv->len, 0);
+ sum = csum_fold(csum_add(csum_sub(~csum_unfold(sum), fsum),
+ tsum));
+ if (sum == 0)
+ sum = CSUM_MANGLED_0;
+
+ if (!skb_make_writable(skb, csum_offset + sizeof(sum)) ||
+ skb_store_bits(skb, csum_offset, &sum, sizeof(sum)) < 0)
+ goto err;
+ }
+
+ if (!skb_make_writable(skb, max(offset + priv->len, 0)) ||
+ skb_store_bits(skb, offset, src, priv->len) < 0)
+ goto err;
+
+ return;
+err:
+ regs->verdict.code = NFT_BREAK;
+}
+
+static int nft_payload_set_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_payload_set *priv = nft_expr_priv(expr);
+
+ priv->base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE]));
+ priv->offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET]));
+ priv->len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN]));
+ priv->sreg = nft_parse_register(tb[NFTA_PAYLOAD_SREG]);
+
+ if (tb[NFTA_PAYLOAD_CSUM_TYPE])
+ priv->csum_type =
+ ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_TYPE]));
+ if (tb[NFTA_PAYLOAD_CSUM_OFFSET])
+ priv->csum_offset =
+ ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_OFFSET]));
+
+ switch (priv->csum_type) {
+ case NFT_PAYLOAD_CSUM_NONE:
+ case NFT_PAYLOAD_CSUM_INET:
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return nft_validate_register_load(priv->sreg, priv->len);
+}
+
+static int nft_payload_set_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_payload_set *priv = nft_expr_priv(expr);
+
+ if (nft_dump_register(skb, NFTA_PAYLOAD_SREG, priv->sreg) ||
+ nla_put_be32(skb, NFTA_PAYLOAD_BASE, htonl(priv->base)) ||
+ nla_put_be32(skb, NFTA_PAYLOAD_OFFSET, htonl(priv->offset)) ||
+ nla_put_be32(skb, NFTA_PAYLOAD_LEN, htonl(priv->len)) ||
+ nla_put_be32(skb, NFTA_PAYLOAD_CSUM_TYPE, htonl(priv->csum_type)) ||
+ nla_put_be32(skb, NFTA_PAYLOAD_CSUM_OFFSET,
+ htonl(priv->csum_offset)))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static const struct nft_expr_ops nft_payload_set_ops = {
+ .type = &nft_payload_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_payload_set)),
+ .eval = nft_payload_set_eval,
+ .init = nft_payload_set_init,
+ .dump = nft_payload_set_dump,
+};
+
static const struct nft_expr_ops *
nft_payload_select_ops(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
@@ -167,8 +282,7 @@ nft_payload_select_ops(const struct nft_ctx *ctx,
enum nft_payload_bases base;
unsigned int offset, len;
- if (tb[NFTA_PAYLOAD_DREG] == NULL ||
- tb[NFTA_PAYLOAD_BASE] == NULL ||
+ if (tb[NFTA_PAYLOAD_BASE] == NULL ||
tb[NFTA_PAYLOAD_OFFSET] == NULL ||
tb[NFTA_PAYLOAD_LEN] == NULL)
return ERR_PTR(-EINVAL);
@@ -183,6 +297,15 @@ nft_payload_select_ops(const struct nft_ctx *ctx,
return ERR_PTR(-EOPNOTSUPP);
}
+ if (tb[NFTA_PAYLOAD_SREG] != NULL) {
+ if (tb[NFTA_PAYLOAD_DREG] != NULL)
+ return ERR_PTR(-EINVAL);
+ return &nft_payload_set_ops;
+ }
+
+ if (tb[NFTA_PAYLOAD_DREG] == NULL)
+ return ERR_PTR(-EINVAL);
+
offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET]));
len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN]));
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index d4aaad747ea9..582c9cfd6567 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -26,6 +26,7 @@
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/audit.h>
+#include <linux/user_namespace.h>
#include <net/net_namespace.h>
#include <linux/netfilter/x_tables.h>
@@ -658,6 +659,9 @@ struct xt_table_info *xt_alloc_table_info(unsigned int size)
struct xt_table_info *info = NULL;
size_t sz = sizeof(*info) + size;
+ if (sz < sizeof(*info))
+ return NULL;
+
/* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
if ((SMP_ALIGN(size) >> PAGE_SHIFT) + 2 > totalram_pages)
return NULL;
@@ -693,12 +697,45 @@ EXPORT_SYMBOL(xt_free_table_info);
struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
const char *name)
{
- struct xt_table *t;
+ struct xt_table *t, *found = NULL;
mutex_lock(&xt[af].mutex);
list_for_each_entry(t, &net->xt.tables[af], list)
if (strcmp(t->name, name) == 0 && try_module_get(t->me))
return t;
+
+ if (net == &init_net)
+ goto out;
+
+ /* Table doesn't exist in this netns, re-try init */
+ list_for_each_entry(t, &init_net.xt.tables[af], list) {
+ if (strcmp(t->name, name))
+ continue;
+ if (!try_module_get(t->me))
+ return NULL;
+
+ mutex_unlock(&xt[af].mutex);
+ if (t->table_init(net) != 0) {
+ module_put(t->me);
+ return NULL;
+ }
+
+ found = t;
+
+ mutex_lock(&xt[af].mutex);
+ break;
+ }
+
+ if (!found)
+ goto out;
+
+ /* and once again: */
+ list_for_each_entry(t, &net->xt.tables[af], list)
+ if (strcmp(t->name, name) == 0)
+ return t;
+
+ module_put(found->me);
+ out:
mutex_unlock(&xt[af].mutex);
return NULL;
}
@@ -1169,20 +1206,20 @@ static const struct file_operations xt_target_ops = {
#endif /* CONFIG_PROC_FS */
/**
- * xt_hook_link - set up hooks for a new table
+ * xt_hook_ops_alloc - set up hooks for a new table
* @table: table with metadata needed to set up hooks
* @fn: Hook function
*
- * This function will take care of creating and registering the necessary
- * Netfilter hooks for XT tables.
+ * This function will create the nf_hook_ops that the x_table needs
+ * to hand to xt_hook_link_net().
*/
-struct nf_hook_ops *xt_hook_link(const struct xt_table *table, nf_hookfn *fn)
+struct nf_hook_ops *
+xt_hook_ops_alloc(const struct xt_table *table, nf_hookfn *fn)
{
unsigned int hook_mask = table->valid_hooks;
uint8_t i, num_hooks = hweight32(hook_mask);
uint8_t hooknum;
struct nf_hook_ops *ops;
- int ret;
ops = kmalloc(sizeof(*ops) * num_hooks, GFP_KERNEL);
if (ops == NULL)
@@ -1199,33 +1236,17 @@ struct nf_hook_ops *xt_hook_link(const struct xt_table *table, nf_hookfn *fn)
++i;
}
- ret = nf_register_hooks(ops, num_hooks);
- if (ret < 0) {
- kfree(ops);
- return ERR_PTR(ret);
- }
-
return ops;
}
-EXPORT_SYMBOL_GPL(xt_hook_link);
-
-/**
- * xt_hook_unlink - remove hooks for a table
- * @ops: nf_hook_ops array as returned by nf_hook_link
- * @hook_mask: the very same mask that was passed to nf_hook_link
- */
-void xt_hook_unlink(const struct xt_table *table, struct nf_hook_ops *ops)
-{
- nf_unregister_hooks(ops, hweight32(table->valid_hooks));
- kfree(ops);
-}
-EXPORT_SYMBOL_GPL(xt_hook_unlink);
+EXPORT_SYMBOL_GPL(xt_hook_ops_alloc);
int xt_proto_init(struct net *net, u_int8_t af)
{
#ifdef CONFIG_PROC_FS
char buf[XT_FUNCTION_MAXNAMELEN];
struct proc_dir_entry *proc;
+ kuid_t root_uid;
+ kgid_t root_gid;
#endif
if (af >= ARRAY_SIZE(xt_prefix))
@@ -1233,12 +1254,17 @@ int xt_proto_init(struct net *net, u_int8_t af)
#ifdef CONFIG_PROC_FS
+ root_uid = make_kuid(net->user_ns, 0);
+ root_gid = make_kgid(net->user_ns, 0);
+
strlcpy(buf, xt_prefix[af], sizeof(buf));
strlcat(buf, FORMAT_TABLES, sizeof(buf));
proc = proc_create_data(buf, 0440, net->proc_net, &xt_table_ops,
(void *)(unsigned long)af);
if (!proc)
goto out;
+ if (uid_valid(root_uid) && gid_valid(root_gid))
+ proc_set_user(proc, root_uid, root_gid);
strlcpy(buf, xt_prefix[af], sizeof(buf));
strlcat(buf, FORMAT_MATCHES, sizeof(buf));
@@ -1246,6 +1272,8 @@ int xt_proto_init(struct net *net, u_int8_t af)
(void *)(unsigned long)af);
if (!proc)
goto out_remove_tables;
+ if (uid_valid(root_uid) && gid_valid(root_gid))
+ proc_set_user(proc, root_uid, root_gid);
strlcpy(buf, xt_prefix[af], sizeof(buf));
strlcat(buf, FORMAT_TARGETS, sizeof(buf));
@@ -1253,6 +1281,8 @@ int xt_proto_init(struct net *net, u_int8_t af)
(void *)(unsigned long)af);
if (!proc)
goto out_remove_matches;
+ if (uid_valid(root_uid) && gid_valid(root_gid))
+ proc_set_user(proc, root_uid, root_gid);
#endif
return 0;
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index e7ac07e53b59..6669e68d589e 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -143,7 +143,7 @@ xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par,
goto out;
}
- timeout = timeout_find_get(timeout_name);
+ timeout = timeout_find_get(par->net, timeout_name);
if (timeout == NULL) {
ret = -ENOENT;
pr_info("No such timeout policy \"%s\"\n", timeout_name);
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index b7c43def0dc6..e118397254af 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -228,7 +228,7 @@ tcpmss_tg6(struct sk_buff *skb, const struct xt_action_param *par)
{
struct ipv6hdr *ipv6h = ipv6_hdr(skb);
u8 nexthdr;
- __be16 frag_off;
+ __be16 frag_off, oldlen, newlen;
int tcphoff;
int ret;
@@ -244,7 +244,12 @@ tcpmss_tg6(struct sk_buff *skb, const struct xt_action_param *par)
return NF_DROP;
if (ret > 0) {
ipv6h = ipv6_hdr(skb);
- ipv6h->payload_len = htons(ntohs(ipv6h->payload_len) + ret);
+ oldlen = ipv6h->payload_len;
+ newlen = htons(ntohs(oldlen) + ret);
+ if (skb->ip_summed == CHECKSUM_COMPLETE)
+ skb->csum = csum_add(csum_sub(skb->csum, oldlen),
+ newlen);
+ ipv6h->payload_len = newlen;
}
return XT_CONTINUE;
}
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index 899b06115fc5..6e57a3966dc5 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -31,19 +31,21 @@ static unsigned int
tee_tg4(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_tee_tginfo *info = par->targinfo;
+ int oif = info->priv ? info->priv->oif : 0;
- nf_dup_ipv4(par->net, skb, par->hooknum, &info->gw.in, info->priv->oif);
+ nf_dup_ipv4(par->net, skb, par->hooknum, &info->gw.in, oif);
return XT_CONTINUE;
}
-#if IS_ENABLED(CONFIG_NF_DUP_IPV6)
+#if IS_ENABLED(CONFIG_IPV6)
static unsigned int
tee_tg6(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_tee_tginfo *info = par->targinfo;
+ int oif = info->priv ? info->priv->oif : 0;
- nf_dup_ipv6(par->net, skb, par->hooknum, &info->gw.in6, info->priv->oif);
+ nf_dup_ipv6(par->net, skb, par->hooknum, &info->gw.in6, oif);
return XT_CONTINUE;
}
@@ -129,7 +131,7 @@ static struct xt_target tee_tg_reg[] __read_mostly = {
.destroy = tee_tg_destroy,
.me = THIS_MODULE,
},
-#if IS_ENABLED(CONFIG_NF_DUP_IPV6)
+#if IS_ENABLED(CONFIG_IPV6)
{
.name = "TEE",
.revision = 1,
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index 3ab591e73ec0..7f4414d26a66 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -105,19 +105,24 @@ tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr)
* belonging to established connections going through that one.
*/
static inline struct sock *
-nf_tproxy_get_sock_v4(struct net *net, const u8 protocol,
+nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp,
+ const u8 protocol,
const __be32 saddr, const __be32 daddr,
const __be16 sport, const __be16 dport,
const struct net_device *in,
const enum nf_tproxy_lookup_t lookup_type)
{
struct sock *sk;
+ struct tcphdr *tcph;
switch (protocol) {
case IPPROTO_TCP:
switch (lookup_type) {
case NFT_LOOKUP_LISTENER:
- sk = inet_lookup_listener(net, &tcp_hashinfo,
+ tcph = hp;
+ sk = inet_lookup_listener(net, &tcp_hashinfo, skb,
+ ip_hdrlen(skb) +
+ __tcp_hdrlen(tcph),
saddr, sport,
daddr, dport,
in->ifindex);
@@ -169,19 +174,23 @@ nf_tproxy_get_sock_v4(struct net *net, const u8 protocol,
#ifdef XT_TPROXY_HAVE_IPV6
static inline struct sock *
-nf_tproxy_get_sock_v6(struct net *net, const u8 protocol,
+nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp,
+ const u8 protocol,
const struct in6_addr *saddr, const struct in6_addr *daddr,
const __be16 sport, const __be16 dport,
const struct net_device *in,
const enum nf_tproxy_lookup_t lookup_type)
{
struct sock *sk;
+ struct tcphdr *tcph;
switch (protocol) {
case IPPROTO_TCP:
switch (lookup_type) {
case NFT_LOOKUP_LISTENER:
- sk = inet6_lookup_listener(net, &tcp_hashinfo,
+ tcph = hp;
+ sk = inet6_lookup_listener(net, &tcp_hashinfo, skb,
+ thoff + __tcp_hdrlen(tcph),
saddr, sport,
daddr, ntohs(dport),
in->ifindex);
@@ -267,7 +276,7 @@ tproxy_handle_time_wait4(struct net *net, struct sk_buff *skb,
* to a listener socket if there's one */
struct sock *sk2;
- sk2 = nf_tproxy_get_sock_v4(net, iph->protocol,
+ sk2 = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol,
iph->saddr, laddr ? laddr : iph->daddr,
hp->source, lport ? lport : hp->dest,
skb->dev, NFT_LOOKUP_LISTENER);
@@ -305,7 +314,7 @@ tproxy_tg4(struct net *net, struct sk_buff *skb, __be32 laddr, __be16 lport,
* addresses, this happens if the redirect already happened
* and the current packet belongs to an already established
* connection */
- sk = nf_tproxy_get_sock_v4(net, iph->protocol,
+ sk = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol,
iph->saddr, iph->daddr,
hp->source, hp->dest,
skb->dev, NFT_LOOKUP_ESTABLISHED);
@@ -321,7 +330,7 @@ tproxy_tg4(struct net *net, struct sk_buff *skb, __be32 laddr, __be16 lport,
else if (!sk)
/* no, there's no established connection, check if
* there's a listener on the redirected addr/port */
- sk = nf_tproxy_get_sock_v4(net, iph->protocol,
+ sk = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol,
iph->saddr, laddr,
hp->source, lport,
skb->dev, NFT_LOOKUP_LISTENER);
@@ -429,7 +438,7 @@ tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff,
* to a listener socket if there's one */
struct sock *sk2;
- sk2 = nf_tproxy_get_sock_v6(par->net, tproto,
+ sk2 = nf_tproxy_get_sock_v6(par->net, skb, thoff, hp, tproto,
&iph->saddr,
tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr),
hp->source,
@@ -472,7 +481,7 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
* addresses, this happens if the redirect already happened
* and the current packet belongs to an already established
* connection */
- sk = nf_tproxy_get_sock_v6(par->net, tproto,
+ sk = nf_tproxy_get_sock_v6(par->net, skb, thoff, hp, tproto,
&iph->saddr, &iph->daddr,
hp->source, hp->dest,
par->in, NFT_LOOKUP_ESTABLISHED);
@@ -487,8 +496,8 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
else if (!sk)
/* no there's no established connection, check if
* there's a listener on the redirected addr/port */
- sk = nf_tproxy_get_sock_v6(par->net, tproto,
- &iph->saddr, laddr,
+ sk = nf_tproxy_get_sock_v6(par->net, skb, thoff, hp,
+ tproto, &iph->saddr, laddr,
hp->source, lport,
par->in, NFT_LOOKUP_LISTENER);
diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c
index a1d126f29463..a086a914865f 100644
--- a/net/netfilter/xt_cgroup.c
+++ b/net/netfilter/xt_cgroup.c
@@ -24,9 +24,9 @@ MODULE_DESCRIPTION("Xtables: process control group matching");
MODULE_ALIAS("ipt_cgroup");
MODULE_ALIAS("ip6t_cgroup");
-static int cgroup_mt_check(const struct xt_mtchk_param *par)
+static int cgroup_mt_check_v0(const struct xt_mtchk_param *par)
{
- struct xt_cgroup_info *info = par->matchinfo;
+ struct xt_cgroup_info_v0 *info = par->matchinfo;
if (info->invert & ~1)
return -EINVAL;
@@ -34,38 +34,110 @@ static int cgroup_mt_check(const struct xt_mtchk_param *par)
return 0;
}
+static int cgroup_mt_check_v1(const struct xt_mtchk_param *par)
+{
+ struct xt_cgroup_info_v1 *info = par->matchinfo;
+ struct cgroup *cgrp;
+
+ if ((info->invert_path & ~1) || (info->invert_classid & ~1))
+ return -EINVAL;
+
+ if (!info->has_path && !info->has_classid) {
+ pr_info("xt_cgroup: no path or classid specified\n");
+ return -EINVAL;
+ }
+
+ if (info->has_path && info->has_classid) {
+ pr_info("xt_cgroup: both path and classid specified\n");
+ return -EINVAL;
+ }
+
+ if (info->has_path) {
+ cgrp = cgroup_get_from_path(info->path);
+ if (IS_ERR(cgrp)) {
+ pr_info("xt_cgroup: invalid path, errno=%ld\n",
+ PTR_ERR(cgrp));
+ return -EINVAL;
+ }
+ info->priv = cgrp;
+ }
+
+ return 0;
+}
+
static bool
-cgroup_mt(const struct sk_buff *skb, struct xt_action_param *par)
+cgroup_mt_v0(const struct sk_buff *skb, struct xt_action_param *par)
{
- const struct xt_cgroup_info *info = par->matchinfo;
+ const struct xt_cgroup_info_v0 *info = par->matchinfo;
if (skb->sk == NULL || !sk_fullsock(skb->sk))
return false;
- return (info->id == skb->sk->sk_classid) ^ info->invert;
+ return (info->id == sock_cgroup_classid(&skb->sk->sk_cgrp_data)) ^
+ info->invert;
+}
+
+static bool cgroup_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
+{
+ const struct xt_cgroup_info_v1 *info = par->matchinfo;
+ struct sock_cgroup_data *skcd = &skb->sk->sk_cgrp_data;
+ struct cgroup *ancestor = info->priv;
+
+ if (!skb->sk || !sk_fullsock(skb->sk))
+ return false;
+
+ if (ancestor)
+ return cgroup_is_descendant(sock_cgroup_ptr(skcd), ancestor) ^
+ info->invert_path;
+ else
+ return (info->classid == sock_cgroup_classid(skcd)) ^
+ info->invert_classid;
+}
+
+static void cgroup_mt_destroy_v1(const struct xt_mtdtor_param *par)
+{
+ struct xt_cgroup_info_v1 *info = par->matchinfo;
+
+ if (info->priv)
+ cgroup_put(info->priv);
}
-static struct xt_match cgroup_mt_reg __read_mostly = {
- .name = "cgroup",
- .revision = 0,
- .family = NFPROTO_UNSPEC,
- .checkentry = cgroup_mt_check,
- .match = cgroup_mt,
- .matchsize = sizeof(struct xt_cgroup_info),
- .me = THIS_MODULE,
- .hooks = (1 << NF_INET_LOCAL_OUT) |
- (1 << NF_INET_POST_ROUTING) |
- (1 << NF_INET_LOCAL_IN),
+static struct xt_match cgroup_mt_reg[] __read_mostly = {
+ {
+ .name = "cgroup",
+ .revision = 0,
+ .family = NFPROTO_UNSPEC,
+ .checkentry = cgroup_mt_check_v0,
+ .match = cgroup_mt_v0,
+ .matchsize = sizeof(struct xt_cgroup_info_v0),
+ .me = THIS_MODULE,
+ .hooks = (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_POST_ROUTING) |
+ (1 << NF_INET_LOCAL_IN),
+ },
+ {
+ .name = "cgroup",
+ .revision = 1,
+ .family = NFPROTO_UNSPEC,
+ .checkentry = cgroup_mt_check_v1,
+ .match = cgroup_mt_v1,
+ .matchsize = sizeof(struct xt_cgroup_info_v1),
+ .destroy = cgroup_mt_destroy_v1,
+ .me = THIS_MODULE,
+ .hooks = (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_POST_ROUTING) |
+ (1 << NF_INET_LOCAL_IN),
+ },
};
static int __init cgroup_mt_init(void)
{
- return xt_register_match(&cgroup_mt_reg);
+ return xt_register_matches(cgroup_mt_reg, ARRAY_SIZE(cgroup_mt_reg));
}
static void __exit cgroup_mt_exit(void)
{
- xt_unregister_match(&cgroup_mt_reg);
+ xt_unregister_matches(cgroup_mt_reg, ARRAY_SIZE(cgroup_mt_reg));
}
module_init(cgroup_mt_init);
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index df8801e02a32..2455b69b5810 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -61,8 +61,8 @@ static const struct nla_policy xt_osf_policy[OSF_ATTR_MAX + 1] = {
[OSF_ATTR_FINGER] = { .len = sizeof(struct xt_osf_user_finger) },
};
-static int xt_osf_add_callback(struct sock *ctnl, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
+static int xt_osf_add_callback(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const osf_attrs[])
{
struct xt_osf_user_finger *f;
@@ -104,7 +104,8 @@ static int xt_osf_add_callback(struct sock *ctnl, struct sk_buff *skb,
return err;
}
-static int xt_osf_remove_callback(struct sock *ctnl, struct sk_buff *skb,
+static int xt_osf_remove_callback(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const osf_attrs[])
{
@@ -261,7 +262,6 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
if (f->opt[optnum].kind == (*optp)) {
__u32 len = f->opt[optnum].length;
const __u8 *optend = optp + len;
- int loop_cont = 0;
fmatch = FMATCH_OK;
@@ -274,7 +274,6 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
mss = ntohs((__force __be16)mss);
break;
case OSFOPT_TS:
- loop_cont = 1;
break;
}
diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c
index ca2e577ed8ac..1302b475abcb 100644
--- a/net/netfilter/xt_owner.c
+++ b/net/netfilter/xt_owner.c
@@ -14,6 +14,7 @@
#include <linux/skbuff.h>
#include <linux/file.h>
#include <net/sock.h>
+#include <net/inet_sock.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter/xt_owner.h>
@@ -33,8 +34,9 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_owner_match_info *info = par->matchinfo;
const struct file *filp;
+ struct sock *sk = skb_to_full_sk(skb);
- if (skb->sk == NULL || skb->sk->sk_socket == NULL)
+ if (sk == NULL || sk->sk_socket == NULL)
return (info->match ^ info->invert) == 0;
else if (info->match & info->invert & XT_OWNER_SOCKET)
/*
@@ -43,7 +45,7 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par)
*/
return false;
- filp = skb->sk->sk_socket->file;
+ filp = sk->sk_socket->file;
if (filp == NULL)
return ((info->match ^ info->invert) &
(XT_OWNER_UID | XT_OWNER_GID)) == 0;
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 2ec08f04b816..49d14ecad444 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -112,14 +112,15 @@ extract_icmp4_fields(const struct sk_buff *skb,
* box.
*/
static struct sock *
-xt_socket_get_sock_v4(struct net *net, const u8 protocol,
+xt_socket_get_sock_v4(struct net *net, struct sk_buff *skb, const int doff,
+ const u8 protocol,
const __be32 saddr, const __be32 daddr,
const __be16 sport, const __be16 dport,
const struct net_device *in)
{
switch (protocol) {
case IPPROTO_TCP:
- return __inet_lookup(net, &tcp_hashinfo,
+ return __inet_lookup(net, &tcp_hashinfo, skb, doff,
saddr, sport, daddr, dport,
in->ifindex);
case IPPROTO_UDP:
@@ -148,6 +149,8 @@ static struct sock *xt_socket_lookup_slow_v4(struct net *net,
const struct net_device *indev)
{
const struct iphdr *iph = ip_hdr(skb);
+ struct sk_buff *data_skb = NULL;
+ int doff = 0;
__be32 uninitialized_var(daddr), uninitialized_var(saddr);
__be16 uninitialized_var(dport), uninitialized_var(sport);
u8 uninitialized_var(protocol);
@@ -169,6 +172,10 @@ static struct sock *xt_socket_lookup_slow_v4(struct net *net,
sport = hp->source;
daddr = iph->daddr;
dport = hp->dest;
+ data_skb = (struct sk_buff *)skb;
+ doff = iph->protocol == IPPROTO_TCP ?
+ ip_hdrlen(skb) + __tcp_hdrlen((struct tcphdr *)hp) :
+ ip_hdrlen(skb) + sizeof(*hp);
} else if (iph->protocol == IPPROTO_ICMP) {
if (extract_icmp4_fields(skb, &protocol, &saddr, &daddr,
@@ -198,8 +205,8 @@ static struct sock *xt_socket_lookup_slow_v4(struct net *net,
}
#endif
- return xt_socket_get_sock_v4(net, protocol, saddr, daddr,
- sport, dport, indev);
+ return xt_socket_get_sock_v4(net, data_skb, doff, protocol, saddr,
+ daddr, sport, dport, indev);
}
static bool
@@ -318,14 +325,15 @@ extract_icmp6_fields(const struct sk_buff *skb,
}
static struct sock *
-xt_socket_get_sock_v6(struct net *net, const u8 protocol,
+xt_socket_get_sock_v6(struct net *net, struct sk_buff *skb, int doff,
+ const u8 protocol,
const struct in6_addr *saddr, const struct in6_addr *daddr,
const __be16 sport, const __be16 dport,
const struct net_device *in)
{
switch (protocol) {
case IPPROTO_TCP:
- return inet6_lookup(net, &tcp_hashinfo,
+ return inet6_lookup(net, &tcp_hashinfo, skb, doff,
saddr, sport, daddr, dport,
in->ifindex);
case IPPROTO_UDP:
@@ -343,6 +351,8 @@ static struct sock *xt_socket_lookup_slow_v6(struct net *net,
__be16 uninitialized_var(dport), uninitialized_var(sport);
const struct in6_addr *daddr = NULL, *saddr = NULL;
struct ipv6hdr *iph = ipv6_hdr(skb);
+ struct sk_buff *data_skb = NULL;
+ int doff = 0;
int thoff = 0, tproto;
tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL);
@@ -362,6 +372,10 @@ static struct sock *xt_socket_lookup_slow_v6(struct net *net,
sport = hp->source;
daddr = &iph->daddr;
dport = hp->dest;
+ data_skb = (struct sk_buff *)skb;
+ doff = tproto == IPPROTO_TCP ?
+ thoff + __tcp_hdrlen((struct tcphdr *)hp) :
+ thoff + sizeof(*hp);
} else if (tproto == IPPROTO_ICMPV6) {
struct ipv6hdr ipv6_var;
@@ -373,7 +387,7 @@ static struct sock *xt_socket_lookup_slow_v6(struct net *net,
return NULL;
}
- return xt_socket_get_sock_v6(net, tproto, saddr, daddr,
+ return xt_socket_get_sock_v6(net, data_skb, doff, tproto, saddr, daddr,
sport, dport, indev);
}