From 7064d16e162adf8199f0288b694e6af823ed5431 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 24 May 2012 10:34:21 -0600 Subject: userns: Use kgids for sysctl_ping_group_range - Store sysctl_ping_group_range as a paire of kgid_t values instead of a pair of gid_t values. - Move the kgid conversion work from ping_init_sock into ipv4_ping_group_range - For invalid cases reset to the default disabled state. With the kgid_t conversion made part of the original value sanitation from userspace understand how the code will react becomes clearer and it becomes possible to set the sysctl ping group range from something other than the initial user namespace. Cc: Vasiliy Kulikov Acked-by: David S. Miller Signed-off-by: Eric W. Biederman --- include/net/netns/ipv4.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/net/netns') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 1474dd65c66f..3516dc0cc615 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -5,6 +5,7 @@ #ifndef __NETNS_IPV4_H__ #define __NETNS_IPV4_H__ +#include #include struct tcpm_hash_bucket; @@ -62,7 +63,7 @@ struct netns_ipv4 { int sysctl_icmp_ratemask; int sysctl_icmp_errors_use_inbound_ifaddr; - unsigned int sysctl_ping_group_range[2]; + kgid_t sysctl_ping_group_range[2]; long sysctl_tcp_mem[3]; atomic_t rt_genid; -- cgit v1.2.3 From 4db67e808640e3934d82ce61ee8e2e89fd877ba8 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 6 Aug 2012 08:42:04 +0000 Subject: sctp: Make the address lists per network namespace - Move the address lists into struct net - Add per network namespace initialization and cleanup - Pass around struct net so it is everywhere I need it. - Rename all of the global variable references into references to the variables moved into struct net Signed-off-by: "Eric W. Biederman" Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/net/net_namespace.h | 4 ++ include/net/netns/sctp.h | 21 +++++++ include/net/sctp/sctp.h | 4 +- include/net/sctp/structs.h | 22 +------ net/sctp/associola.c | 3 +- net/sctp/bind_addr.c | 14 ++--- net/sctp/ipv6.c | 17 +++--- net/sctp/protocol.c | 143 +++++++++++++++++++++++++------------------- net/sctp/socket.c | 7 ++- 9 files changed, 132 insertions(+), 103 deletions(-) create mode 100644 include/net/netns/sctp.h (limited to 'include/net/netns') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 97e441945c13..5ae57f1ab755 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) @@ -81,6 +82,9 @@ struct net { #if IS_ENABLED(CONFIG_IPV6) struct netns_ipv6 ipv6; #endif +#if defined(CONFIG_IP_SCTP) || defined(CONFIG_IP_SCTP_MODULE) + struct netns_sctp sctp; +#endif #if defined(CONFIG_IP_DCCP) || defined(CONFIG_IP_DCCP_MODULE) struct netns_dccp dccp; #endif diff --git a/include/net/netns/sctp.h b/include/net/netns/sctp.h new file mode 100644 index 000000000000..cbd684e01cf7 --- /dev/null +++ b/include/net/netns/sctp.h @@ -0,0 +1,21 @@ +#ifndef __NETNS_SCTP_H__ +#define __NETNS_SCTP_H__ + +struct netns_sctp { + /* This is the global local address list. + * We actively maintain this complete list of addresses on + * the system by catching address add/delete events. + * + * It is a list of sctp_sockaddr_entry. + */ + struct list_head local_addr_list; + struct list_head addr_waitq; + struct timer_list addr_wq_timer; + struct list_head auto_asconf_splist; + spinlock_t addr_wq_lock; + + /* Lock that protects the local_addr_list writers */ + spinlock_t local_addr_lock; +}; + +#endif /* __NETNS_SCTP_H__ */ diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 640915a0613d..00c920537d4a 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -115,12 +115,12 @@ * sctp/protocol.c */ extern struct sock *sctp_get_ctl_sock(void); -extern int sctp_copy_local_addr_list(struct sctp_bind_addr *, +extern int sctp_copy_local_addr_list(struct net *, struct sctp_bind_addr *, sctp_scope_t, gfp_t gfp, int flags); extern struct sctp_pf *sctp_get_pf_specific(sa_family_t family); extern int sctp_register_pf(struct sctp_pf *, sa_family_t); -extern void sctp_addr_wq_mgmt(struct sctp_sockaddr_entry *, int); +extern void sctp_addr_wq_mgmt(struct net *, struct sctp_sockaddr_entry *, int); /* * sctp/socket.c diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index c0563d1dd7c7..6bdfcabe560e 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -205,21 +205,7 @@ extern struct sctp_globals { int port_hashsize; struct sctp_bind_hashbucket *port_hashtable; - /* This is the global local address list. - * We actively maintain this complete list of addresses on - * the system by catching address add/delete events. - * - * It is a list of sctp_sockaddr_entry. - */ - struct list_head local_addr_list; int default_auto_asconf; - struct list_head addr_waitq; - struct timer_list addr_wq_timer; - struct list_head auto_asconf_splist; - spinlock_t addr_wq_lock; - - /* Lock that protects the local_addr_list writers */ - spinlock_t addr_list_lock; /* Flag to indicate if addip is enabled. */ int addip_enable; @@ -278,12 +264,6 @@ extern struct sctp_globals { #define sctp_assoc_hashtable (sctp_globals.assoc_hashtable) #define sctp_port_hashsize (sctp_globals.port_hashsize) #define sctp_port_hashtable (sctp_globals.port_hashtable) -#define sctp_local_addr_list (sctp_globals.local_addr_list) -#define sctp_local_addr_lock (sctp_globals.addr_list_lock) -#define sctp_auto_asconf_splist (sctp_globals.auto_asconf_splist) -#define sctp_addr_waitq (sctp_globals.addr_waitq) -#define sctp_addr_wq_timer (sctp_globals.addr_wq_timer) -#define sctp_addr_wq_lock (sctp_globals.addr_wq_lock) #define sctp_default_auto_asconf (sctp_globals.default_auto_asconf) #define sctp_scope_policy (sctp_globals.ipv4_scope_policy) #define sctp_addip_enable (sctp_globals.addip_enable) @@ -1241,7 +1221,7 @@ struct sctp_bind_addr { void sctp_bind_addr_init(struct sctp_bind_addr *, __u16 port); void sctp_bind_addr_free(struct sctp_bind_addr *); -int sctp_bind_addr_copy(struct sctp_bind_addr *dest, +int sctp_bind_addr_copy(struct net *net, struct sctp_bind_addr *dest, const struct sctp_bind_addr *src, sctp_scope_t scope, gfp_t gfp, int flags); diff --git a/net/sctp/associola.c b/net/sctp/associola.c index a3601f35ac15..ed4930b31341 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1544,7 +1544,8 @@ int sctp_assoc_set_bind_addr_from_ep(struct sctp_association *asoc, if (asoc->peer.ipv6_address) flags |= SCTP_ADDR6_PEERSUPP; - return sctp_bind_addr_copy(&asoc->base.bind_addr, + return sctp_bind_addr_copy(sock_net(asoc->base.sk), + &asoc->base.bind_addr, &asoc->ep->base.bind_addr, scope, gfp, flags); } diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c index 4ece451c8d27..a85ce4b3e574 100644 --- a/net/sctp/bind_addr.c +++ b/net/sctp/bind_addr.c @@ -52,8 +52,8 @@ #include /* Forward declarations for internal helpers. */ -static int sctp_copy_one_addr(struct sctp_bind_addr *, union sctp_addr *, - sctp_scope_t scope, gfp_t gfp, +static int sctp_copy_one_addr(struct net *, struct sctp_bind_addr *, + union sctp_addr *, sctp_scope_t scope, gfp_t gfp, int flags); static void sctp_bind_addr_clean(struct sctp_bind_addr *); @@ -62,7 +62,7 @@ static void sctp_bind_addr_clean(struct sctp_bind_addr *); /* Copy 'src' to 'dest' taking 'scope' into account. Omit addresses * in 'src' which have a broader scope than 'scope'. */ -int sctp_bind_addr_copy(struct sctp_bind_addr *dest, +int sctp_bind_addr_copy(struct net *net, struct sctp_bind_addr *dest, const struct sctp_bind_addr *src, sctp_scope_t scope, gfp_t gfp, int flags) @@ -75,7 +75,7 @@ int sctp_bind_addr_copy(struct sctp_bind_addr *dest, /* Extract the addresses which are relevant for this scope. */ list_for_each_entry(addr, &src->address_list, list) { - error = sctp_copy_one_addr(dest, &addr->a, scope, + error = sctp_copy_one_addr(net, dest, &addr->a, scope, gfp, flags); if (error < 0) goto out; @@ -87,7 +87,7 @@ int sctp_bind_addr_copy(struct sctp_bind_addr *dest, */ if (list_empty(&dest->address_list) && (SCTP_SCOPE_GLOBAL == scope)) { list_for_each_entry(addr, &src->address_list, list) { - error = sctp_copy_one_addr(dest, &addr->a, + error = sctp_copy_one_addr(net, dest, &addr->a, SCTP_SCOPE_LINK, gfp, flags); if (error < 0) @@ -448,7 +448,7 @@ union sctp_addr *sctp_find_unmatch_addr(struct sctp_bind_addr *bp, } /* Copy out addresses from the global local address list. */ -static int sctp_copy_one_addr(struct sctp_bind_addr *dest, +static int sctp_copy_one_addr(struct net *net, struct sctp_bind_addr *dest, union sctp_addr *addr, sctp_scope_t scope, gfp_t gfp, int flags) @@ -456,7 +456,7 @@ static int sctp_copy_one_addr(struct sctp_bind_addr *dest, int error = 0; if (sctp_is_any(NULL, addr)) { - error = sctp_copy_local_addr_list(dest, scope, gfp, flags); + error = sctp_copy_local_addr_list(net, dest, scope, gfp, flags); } else if (sctp_in_scope(addr, scope)) { /* Now that the address is in scope, check to see if * the address type is supported by local sock as diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 2165a7ed25f1..bbf15341eb2b 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -99,6 +99,7 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev, struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr; struct sctp_sockaddr_entry *addr = NULL; struct sctp_sockaddr_entry *temp; + struct net *net = dev_net(ifa->idev->dev); int found = 0; switch (ev) { @@ -110,27 +111,27 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev, addr->a.v6.sin6_addr = ifa->addr; addr->a.v6.sin6_scope_id = ifa->idev->dev->ifindex; addr->valid = 1; - spin_lock_bh(&sctp_local_addr_lock); - list_add_tail_rcu(&addr->list, &sctp_local_addr_list); - sctp_addr_wq_mgmt(addr, SCTP_ADDR_NEW); - spin_unlock_bh(&sctp_local_addr_lock); + spin_lock_bh(&net->sctp.local_addr_lock); + list_add_tail_rcu(&addr->list, &net->sctp.local_addr_list); + sctp_addr_wq_mgmt(net, addr, SCTP_ADDR_NEW); + spin_unlock_bh(&net->sctp.local_addr_lock); } break; case NETDEV_DOWN: - spin_lock_bh(&sctp_local_addr_lock); + spin_lock_bh(&net->sctp.local_addr_lock); list_for_each_entry_safe(addr, temp, - &sctp_local_addr_list, list) { + &net->sctp.local_addr_list, list) { if (addr->a.sa.sa_family == AF_INET6 && ipv6_addr_equal(&addr->a.v6.sin6_addr, &ifa->addr)) { - sctp_addr_wq_mgmt(addr, SCTP_ADDR_DEL); + sctp_addr_wq_mgmt(net, addr, SCTP_ADDR_DEL); found = 1; addr->valid = 0; list_del_rcu(&addr->list); break; } } - spin_unlock_bh(&sctp_local_addr_lock); + spin_unlock_bh(&net->sctp.local_addr_lock); if (found) kfree_rcu(addr, rcu); break; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 1f89c4e69645..7025d96bae5f 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -201,29 +201,29 @@ static void sctp_v4_copy_addrlist(struct list_head *addrlist, /* Extract our IP addresses from the system and stash them in the * protocol structure. */ -static void sctp_get_local_addr_list(void) +static void sctp_get_local_addr_list(struct net *net) { struct net_device *dev; struct list_head *pos; struct sctp_af *af; rcu_read_lock(); - for_each_netdev_rcu(&init_net, dev) { + for_each_netdev_rcu(net, dev) { __list_for_each(pos, &sctp_address_families) { af = list_entry(pos, struct sctp_af, list); - af->copy_addrlist(&sctp_local_addr_list, dev); + af->copy_addrlist(&net->sctp.local_addr_list, dev); } } rcu_read_unlock(); } /* Free the existing local addresses. */ -static void sctp_free_local_addr_list(void) +static void sctp_free_local_addr_list(struct net *net) { struct sctp_sockaddr_entry *addr; struct list_head *pos, *temp; - list_for_each_safe(pos, temp, &sctp_local_addr_list) { + list_for_each_safe(pos, temp, &net->sctp.local_addr_list) { addr = list_entry(pos, struct sctp_sockaddr_entry, list); list_del(pos); kfree(addr); @@ -231,14 +231,14 @@ static void sctp_free_local_addr_list(void) } /* Copy the local addresses which are valid for 'scope' into 'bp'. */ -int sctp_copy_local_addr_list(struct sctp_bind_addr *bp, sctp_scope_t scope, - gfp_t gfp, int copy_flags) +int sctp_copy_local_addr_list(struct net *net, struct sctp_bind_addr *bp, + sctp_scope_t scope, gfp_t gfp, int copy_flags) { struct sctp_sockaddr_entry *addr; int error = 0; rcu_read_lock(); - list_for_each_entry_rcu(addr, &sctp_local_addr_list, list) { + list_for_each_entry_rcu(addr, &net->sctp.local_addr_list, list) { if (!addr->valid) continue; if (sctp_in_scope(&addr->a, scope)) { @@ -627,14 +627,15 @@ static void sctp_v4_ecn_capable(struct sock *sk) void sctp_addr_wq_timeout_handler(unsigned long arg) { + struct net *net = (struct net *)arg; struct sctp_sockaddr_entry *addrw, *temp; struct sctp_sock *sp; - spin_lock_bh(&sctp_addr_wq_lock); + spin_lock_bh(&net->sctp.addr_wq_lock); - list_for_each_entry_safe(addrw, temp, &sctp_addr_waitq, list) { + list_for_each_entry_safe(addrw, temp, &net->sctp.addr_waitq, list) { SCTP_DEBUG_PRINTK_IPADDR("sctp_addrwq_timo_handler: the first ent in wq %p is ", - " for cmd %d at entry %p\n", &sctp_addr_waitq, &addrw->a, addrw->state, + " for cmd %d at entry %p\n", &net->sctp.addr_waitq, &addrw->a, addrw->state, addrw); #if IS_ENABLED(CONFIG_IPV6) @@ -648,7 +649,7 @@ void sctp_addr_wq_timeout_handler(unsigned long arg) goto free_next; in6 = (struct in6_addr *)&addrw->a.v6.sin6_addr; - if (ipv6_chk_addr(&init_net, in6, NULL, 0) == 0 && + if (ipv6_chk_addr(net, in6, NULL, 0) == 0 && addrw->state == SCTP_ADDR_NEW) { unsigned long timeo_val; @@ -656,12 +657,12 @@ void sctp_addr_wq_timeout_handler(unsigned long arg) SCTP_ADDRESS_TICK_DELAY); timeo_val = jiffies; timeo_val += msecs_to_jiffies(SCTP_ADDRESS_TICK_DELAY); - mod_timer(&sctp_addr_wq_timer, timeo_val); + mod_timer(&net->sctp.addr_wq_timer, timeo_val); break; } } #endif - list_for_each_entry(sp, &sctp_auto_asconf_splist, auto_asconf_list) { + list_for_each_entry(sp, &net->sctp.auto_asconf_splist, auto_asconf_list) { struct sock *sk; sk = sctp_opt2sk(sp); @@ -679,31 +680,32 @@ free_next: list_del(&addrw->list); kfree(addrw); } - spin_unlock_bh(&sctp_addr_wq_lock); + spin_unlock_bh(&net->sctp.addr_wq_lock); } -static void sctp_free_addr_wq(void) +static void sctp_free_addr_wq(struct net *net) { struct sctp_sockaddr_entry *addrw; struct sctp_sockaddr_entry *temp; - spin_lock_bh(&sctp_addr_wq_lock); - del_timer(&sctp_addr_wq_timer); - list_for_each_entry_safe(addrw, temp, &sctp_addr_waitq, list) { + spin_lock_bh(&net->sctp.addr_wq_lock); + del_timer(&net->sctp.addr_wq_timer); + list_for_each_entry_safe(addrw, temp, &net->sctp.addr_waitq, list) { list_del(&addrw->list); kfree(addrw); } - spin_unlock_bh(&sctp_addr_wq_lock); + spin_unlock_bh(&net->sctp.addr_wq_lock); } /* lookup the entry for the same address in the addr_waitq * sctp_addr_wq MUST be locked */ -static struct sctp_sockaddr_entry *sctp_addr_wq_lookup(struct sctp_sockaddr_entry *addr) +static struct sctp_sockaddr_entry *sctp_addr_wq_lookup(struct net *net, + struct sctp_sockaddr_entry *addr) { struct sctp_sockaddr_entry *addrw; - list_for_each_entry(addrw, &sctp_addr_waitq, list) { + list_for_each_entry(addrw, &net->sctp.addr_waitq, list) { if (addrw->a.sa.sa_family != addr->a.sa.sa_family) continue; if (addrw->a.sa.sa_family == AF_INET) { @@ -719,7 +721,7 @@ static struct sctp_sockaddr_entry *sctp_addr_wq_lookup(struct sctp_sockaddr_entr return NULL; } -void sctp_addr_wq_mgmt(struct sctp_sockaddr_entry *addr, int cmd) +void sctp_addr_wq_mgmt(struct net *net, struct sctp_sockaddr_entry *addr, int cmd) { struct sctp_sockaddr_entry *addrw; unsigned long timeo_val; @@ -730,38 +732,38 @@ void sctp_addr_wq_mgmt(struct sctp_sockaddr_entry *addr, int cmd) * new address after a couple of addition and deletion of that address */ - spin_lock_bh(&sctp_addr_wq_lock); + spin_lock_bh(&net->sctp.addr_wq_lock); /* Offsets existing events in addr_wq */ - addrw = sctp_addr_wq_lookup(addr); + addrw = sctp_addr_wq_lookup(net, addr); if (addrw) { if (addrw->state != cmd) { SCTP_DEBUG_PRINTK_IPADDR("sctp_addr_wq_mgmt offsets existing entry for %d ", " in wq %p\n", addrw->state, &addrw->a, - &sctp_addr_waitq); + &net->sctp.addr_waitq); list_del(&addrw->list); kfree(addrw); } - spin_unlock_bh(&sctp_addr_wq_lock); + spin_unlock_bh(&net->sctp.addr_wq_lock); return; } /* OK, we have to add the new address to the wait queue */ addrw = kmemdup(addr, sizeof(struct sctp_sockaddr_entry), GFP_ATOMIC); if (addrw == NULL) { - spin_unlock_bh(&sctp_addr_wq_lock); + spin_unlock_bh(&net->sctp.addr_wq_lock); return; } addrw->state = cmd; - list_add_tail(&addrw->list, &sctp_addr_waitq); + list_add_tail(&addrw->list, &net->sctp.addr_waitq); SCTP_DEBUG_PRINTK_IPADDR("sctp_addr_wq_mgmt add new entry for cmd:%d ", - " in wq %p\n", addrw->state, &addrw->a, &sctp_addr_waitq); + " in wq %p\n", addrw->state, &addrw->a, &net->sctp.addr_waitq); - if (!timer_pending(&sctp_addr_wq_timer)) { + if (!timer_pending(&net->sctp.addr_wq_timer)) { timeo_val = jiffies; timeo_val += msecs_to_jiffies(SCTP_ADDRESS_TICK_DELAY); - mod_timer(&sctp_addr_wq_timer, timeo_val); + mod_timer(&net->sctp.addr_wq_timer, timeo_val); } - spin_unlock_bh(&sctp_addr_wq_lock); + spin_unlock_bh(&net->sctp.addr_wq_lock); } /* Event handler for inet address addition/deletion events. @@ -776,11 +778,9 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev, struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; struct sctp_sockaddr_entry *addr = NULL; struct sctp_sockaddr_entry *temp; + struct net *net = dev_net(ifa->ifa_dev->dev); int found = 0; - if (!net_eq(dev_net(ifa->ifa_dev->dev), &init_net)) - return NOTIFY_DONE; - switch (ev) { case NETDEV_UP: addr = kmalloc(sizeof(struct sctp_sockaddr_entry), GFP_ATOMIC); @@ -789,27 +789,27 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev, addr->a.v4.sin_port = 0; addr->a.v4.sin_addr.s_addr = ifa->ifa_local; addr->valid = 1; - spin_lock_bh(&sctp_local_addr_lock); - list_add_tail_rcu(&addr->list, &sctp_local_addr_list); - sctp_addr_wq_mgmt(addr, SCTP_ADDR_NEW); - spin_unlock_bh(&sctp_local_addr_lock); + spin_lock_bh(&net->sctp.local_addr_lock); + list_add_tail_rcu(&addr->list, &net->sctp.local_addr_list); + sctp_addr_wq_mgmt(net, addr, SCTP_ADDR_NEW); + spin_unlock_bh(&net->sctp.local_addr_lock); } break; case NETDEV_DOWN: - spin_lock_bh(&sctp_local_addr_lock); + spin_lock_bh(&net->sctp.local_addr_lock); list_for_each_entry_safe(addr, temp, - &sctp_local_addr_list, list) { + &net->sctp.local_addr_list, list) { if (addr->a.sa.sa_family == AF_INET && addr->a.v4.sin_addr.s_addr == ifa->ifa_local) { - sctp_addr_wq_mgmt(addr, SCTP_ADDR_DEL); + sctp_addr_wq_mgmt(net, addr, SCTP_ADDR_DEL); found = 1; addr->valid = 0; list_del_rcu(&addr->list); break; } } - spin_unlock_bh(&sctp_local_addr_lock); + spin_unlock_bh(&net->sctp.local_addr_lock); if (found) kfree_rcu(addr, rcu); break; @@ -1194,6 +1194,36 @@ static void sctp_v4_del_protocol(void) unregister_inetaddr_notifier(&sctp_inetaddr_notifier); } +static int sctp_net_init(struct net *net) +{ + /* Initialize the local address list. */ + INIT_LIST_HEAD(&net->sctp.local_addr_list); + spin_lock_init(&net->sctp.local_addr_lock); + sctp_get_local_addr_list(net); + + /* Initialize the address event list */ + INIT_LIST_HEAD(&net->sctp.addr_waitq); + INIT_LIST_HEAD(&net->sctp.auto_asconf_splist); + spin_lock_init(&net->sctp.addr_wq_lock); + net->sctp.addr_wq_timer.expires = 0; + setup_timer(&net->sctp.addr_wq_timer, sctp_addr_wq_timeout_handler, + (unsigned long)net); + + return 0; +} + +static void sctp_net_exit(struct net *net) +{ + /* Free the local address list */ + sctp_free_addr_wq(net); + sctp_free_local_addr_list(net); +} + +static struct pernet_operations sctp_net_ops = { + .init = sctp_net_init, + .exit = sctp_net_exit, +}; + /* Initialize the universe into something sensible. */ SCTP_STATIC __init int sctp_init(void) { @@ -1399,18 +1429,6 @@ SCTP_STATIC __init int sctp_init(void) sctp_v4_pf_init(); sctp_v6_pf_init(); - /* Initialize the local address list. */ - INIT_LIST_HEAD(&sctp_local_addr_list); - spin_lock_init(&sctp_local_addr_lock); - sctp_get_local_addr_list(); - - /* Initialize the address event list */ - INIT_LIST_HEAD(&sctp_addr_waitq); - INIT_LIST_HEAD(&sctp_auto_asconf_splist); - spin_lock_init(&sctp_addr_wq_lock); - sctp_addr_wq_timer.expires = 0; - setup_timer(&sctp_addr_wq_timer, sctp_addr_wq_timeout_handler, 0); - status = sctp_v4_protosw_init(); if (status) @@ -1426,6 +1444,10 @@ SCTP_STATIC __init int sctp_init(void) goto err_ctl_sock_init; } + status = register_pernet_subsys(&sctp_net_ops); + if (status) + goto err_register_pernet_subsys; + status = sctp_v4_add_protocol(); if (status) goto err_add_protocol; @@ -1441,13 +1463,14 @@ out: err_v6_add_protocol: sctp_v4_del_protocol(); err_add_protocol: + unregister_pernet_subsys(&sctp_net_ops); +err_register_pernet_subsys: inet_ctl_sock_destroy(sctp_ctl_sock); err_ctl_sock_init: sctp_v6_protosw_exit(); err_v6_protosw_init: sctp_v4_protosw_exit(); err_protosw_init: - sctp_free_local_addr_list(); sctp_v4_pf_exit(); sctp_v6_pf_exit(); sctp_sysctl_unregister(); @@ -1482,18 +1505,16 @@ SCTP_STATIC __exit void sctp_exit(void) /* Unregister with inet6/inet layers. */ sctp_v6_del_protocol(); sctp_v4_del_protocol(); - sctp_free_addr_wq(); /* Free the control endpoint. */ inet_ctl_sock_destroy(sctp_ctl_sock); + unregister_pernet_subsys(&sctp_net_ops); + /* Free protosw registrations */ sctp_v6_protosw_exit(); sctp_v4_protosw_exit(); - /* Free the local address list. */ - sctp_free_local_addr_list(); - /* Unregister with socket layer. */ sctp_v6_pf_exit(); sctp_v4_pf_exit(); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 4316b0f988d4..5b6dd0e3d1f6 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3471,7 +3471,7 @@ static int sctp_setsockopt_auto_asconf(struct sock *sk, char __user *optval, sp->do_auto_asconf = 0; } else if (val && !sp->do_auto_asconf) { list_add_tail(&sp->auto_asconf_list, - &sctp_auto_asconf_splist); + &sock_net(sk)->sctp.auto_asconf_splist); sp->do_auto_asconf = 1; } return 0; @@ -3964,7 +3964,7 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); if (sctp_default_auto_asconf) { list_add_tail(&sp->auto_asconf_list, - &sctp_auto_asconf_splist); + &sock_net(sk)->sctp.auto_asconf_splist); sp->do_auto_asconf = 1; } else sp->do_auto_asconf = 0; @@ -4653,9 +4653,10 @@ static int sctp_copy_laddrs(struct sock *sk, __u16 port, void *to, union sctp_addr temp; int cnt = 0; int addrlen; + struct net *net = sock_net(sk); rcu_read_lock(); - list_for_each_entry_rcu(addr, &sctp_local_addr_list, list) { + list_for_each_entry_rcu(addr, &net->sctp.local_addr_list, list) { if (!addr->valid) continue; -- cgit v1.2.3 From 2ce955035081112cf1590c961da8d94324142b5e Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 6 Aug 2012 08:43:06 +0000 Subject: sctp: Make the ctl_sock per network namespace - Kill sctp_get_ctl_sock, it is useless now. - Pass struct net where needed so net->sctp.ctl_sock is accessible. Signed-off-by: "Eric W. Biederman" Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/net/netns/sctp.h | 8 ++++++++ include/net/sctp/sctp.h | 1 - net/sctp/input.c | 4 ++-- net/sctp/protocol.c | 47 +++++++++++++++++++---------------------------- net/sctp/sm_statefuns.c | 45 +++++++++++++++++++++++++++++++-------------- 5 files changed, 60 insertions(+), 45 deletions(-) (limited to 'include/net/netns') diff --git a/include/net/netns/sctp.h b/include/net/netns/sctp.h index cbd684e01cf7..29e36b4b4feb 100644 --- a/include/net/netns/sctp.h +++ b/include/net/netns/sctp.h @@ -1,7 +1,15 @@ #ifndef __NETNS_SCTP_H__ #define __NETNS_SCTP_H__ +struct sock; + struct netns_sctp { + /* This is the global socket data structure used for responding to + * the Out-of-the-blue (OOTB) packets. A control sock will be created + * for this socket at the initialization time. + */ + struct sock *ctl_sock; + /* This is the global local address list. * We actively maintain this complete list of addresses on * the system by catching address add/delete events. diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 00c920537d4a..550a81bbfc71 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -114,7 +114,6 @@ /* * sctp/protocol.c */ -extern struct sock *sctp_get_ctl_sock(void); extern int sctp_copy_local_addr_list(struct net *, struct sctp_bind_addr *, sctp_scope_t, gfp_t gfp, int flags); diff --git a/net/sctp/input.c b/net/sctp/input.c index a7e9a85b5acb..c9a0449bde53 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -204,7 +204,7 @@ int sctp_rcv(struct sk_buff *skb) sctp_endpoint_put(ep); ep = NULL; } - sk = sctp_get_ctl_sock(); + sk = net->sctp.ctl_sock; ep = sctp_sk(sk)->ep; sctp_endpoint_hold(ep); rcvr = &ep->base; @@ -795,7 +795,7 @@ static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(struct net *net, goto hit; } - ep = sctp_sk((sctp_get_ctl_sock()))->ep; + ep = sctp_sk(net->sctp.ctl_sock)->ep; hit: sctp_endpoint_hold(ep); diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 7025d96bae5f..f20bd708e89c 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -78,12 +78,6 @@ struct proc_dir_entry *proc_net_sctp; struct idr sctp_assocs_id; DEFINE_SPINLOCK(sctp_assocs_id_lock); -/* This is the global socket data structure used for responding to - * the Out-of-the-blue (OOTB) packets. A control sock will be created - * for this socket at the initialization time. - */ -static struct sock *sctp_ctl_sock; - static struct sctp_pf *sctp_pf_inet6_specific; static struct sctp_pf *sctp_pf_inet_specific; static struct sctp_af *sctp_af_v4_specific; @@ -96,12 +90,6 @@ long sysctl_sctp_mem[3]; int sysctl_sctp_rmem[3]; int sysctl_sctp_wmem[3]; -/* Return the address of the control sock. */ -struct sock *sctp_get_ctl_sock(void) -{ - return sctp_ctl_sock; -} - /* Set up the proc fs entry for the SCTP protocol. */ static __init int sctp_proc_init(void) { @@ -822,7 +810,7 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev, * Initialize the control inode/socket with a control endpoint data * structure. This endpoint is reserved exclusively for the OOTB processing. */ -static int sctp_ctl_sock_init(void) +static int sctp_ctl_sock_init(struct net *net) { int err; sa_family_t family = PF_INET; @@ -830,14 +818,14 @@ static int sctp_ctl_sock_init(void) if (sctp_get_pf_specific(PF_INET6)) family = PF_INET6; - err = inet_ctl_sock_create(&sctp_ctl_sock, family, - SOCK_SEQPACKET, IPPROTO_SCTP, &init_net); + err = inet_ctl_sock_create(&net->sctp.ctl_sock, family, + SOCK_SEQPACKET, IPPROTO_SCTP, net); /* If IPv6 socket could not be created, try the IPv4 socket */ if (err < 0 && family == PF_INET6) - err = inet_ctl_sock_create(&sctp_ctl_sock, AF_INET, + err = inet_ctl_sock_create(&net->sctp.ctl_sock, AF_INET, SOCK_SEQPACKET, IPPROTO_SCTP, - &init_net); + net); if (err < 0) { pr_err("Failed to create the SCTP control socket\n"); @@ -1196,6 +1184,14 @@ static void sctp_v4_del_protocol(void) static int sctp_net_init(struct net *net) { + int status; + + /* Initialize the control inode/socket for handling OOTB packets. */ + if ((status = sctp_ctl_sock_init(net))) { + pr_err("Failed to initialize the SCTP control sock\n"); + goto err_ctl_sock_init; + } + /* Initialize the local address list. */ INIT_LIST_HEAD(&net->sctp.local_addr_list); spin_lock_init(&net->sctp.local_addr_lock); @@ -1210,6 +1206,9 @@ static int sctp_net_init(struct net *net) (unsigned long)net); return 0; + +err_ctl_sock_init: + return status; } static void sctp_net_exit(struct net *net) @@ -1217,6 +1216,9 @@ static void sctp_net_exit(struct net *net) /* Free the local address list */ sctp_free_addr_wq(net); sctp_free_local_addr_list(net); + + /* Free the control endpoint. */ + inet_ctl_sock_destroy(net->sctp.ctl_sock); } static struct pernet_operations sctp_net_ops = { @@ -1438,12 +1440,6 @@ SCTP_STATIC __init int sctp_init(void) if (status) goto err_v6_protosw_init; - /* Initialize the control inode/socket for handling OOTB packets. */ - if ((status = sctp_ctl_sock_init())) { - pr_err("Failed to initialize the SCTP control sock\n"); - goto err_ctl_sock_init; - } - status = register_pernet_subsys(&sctp_net_ops); if (status) goto err_register_pernet_subsys; @@ -1465,8 +1461,6 @@ err_v6_add_protocol: err_add_protocol: unregister_pernet_subsys(&sctp_net_ops); err_register_pernet_subsys: - inet_ctl_sock_destroy(sctp_ctl_sock); -err_ctl_sock_init: sctp_v6_protosw_exit(); err_v6_protosw_init: sctp_v4_protosw_exit(); @@ -1506,9 +1500,6 @@ SCTP_STATIC __exit void sctp_exit(void) sctp_v6_del_protocol(); sctp_v4_del_protocol(); - /* Free the control endpoint. */ - inet_ctl_sock_destroy(sctp_ctl_sock); - unregister_pernet_subsys(&sctp_net_ops); /* Free protosw registrations */ diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 9fca10357350..f2daf615e8fe 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -74,7 +74,8 @@ static struct sctp_packet *sctp_abort_pkt_new(const struct sctp_endpoint *ep, static int sctp_eat_data(const struct sctp_association *asoc, struct sctp_chunk *chunk, sctp_cmd_seq_t *commands); -static struct sctp_packet *sctp_ootb_pkt_new(const struct sctp_association *asoc, +static struct sctp_packet *sctp_ootb_pkt_new(struct net *net, + const struct sctp_association *asoc, const struct sctp_chunk *chunk); static void sctp_send_stale_cookie_err(const struct sctp_endpoint *ep, const struct sctp_association *asoc, @@ -301,6 +302,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep, struct sctp_chunk *err_chunk; struct sctp_packet *packet; sctp_unrecognized_param_t *unk_param; + struct net *net; int len; /* 6.10 Bundling @@ -318,7 +320,8 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep, /* If the packet is an OOTB packet which is temporarily on the * control endpoint, respond with an ABORT. */ - if (ep == sctp_sk((sctp_get_ctl_sock()))->ep) { + net = sock_net(ep->base.sk); + if (ep == sctp_sk(net->sctp.ctl_sock)->ep) { SCTP_INC_STATS(SCTP_MIB_OUTOFBLUES); return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands); } @@ -646,11 +649,13 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep, int error = 0; struct sctp_chunk *err_chk_p; struct sock *sk; + struct net *net; /* If the packet is an OOTB packet which is temporarily on the * control endpoint, respond with an ABORT. */ - if (ep == sctp_sk((sctp_get_ctl_sock()))->ep) { + net = sock_net(ep->base.sk); + if (ep == sctp_sk(net->sctp.ctl_sock)->ep) { SCTP_INC_STATS(SCTP_MIB_OUTOFBLUES); return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands); } @@ -1171,7 +1176,7 @@ sctp_disposition_t sctp_sf_backbeat_8_3(const struct sctp_endpoint *ep, /* Helper function to send out an abort for the restart * condition. */ -static int sctp_sf_send_restart_abort(union sctp_addr *ssa, +static int sctp_sf_send_restart_abort(struct net *net, union sctp_addr *ssa, struct sctp_chunk *init, sctp_cmd_seq_t *commands) { @@ -1197,7 +1202,7 @@ static int sctp_sf_send_restart_abort(union sctp_addr *ssa, errhdr->length = htons(len); /* Assign to the control socket. */ - ep = sctp_sk((sctp_get_ctl_sock()))->ep; + ep = sctp_sk(net->sctp.ctl_sock)->ep; /* Association is NULL since this may be a restart attack and we * want to send back the attacker's vtag. @@ -1240,6 +1245,7 @@ static int sctp_sf_check_restart_addrs(const struct sctp_association *new_asoc, struct sctp_chunk *init, sctp_cmd_seq_t *commands) { + struct net *net = sock_net(new_asoc->base.sk); struct sctp_transport *new_addr; int ret = 1; @@ -1258,7 +1264,7 @@ static int sctp_sf_check_restart_addrs(const struct sctp_association *new_asoc, transports) { if (!list_has_sctp_addr(&asoc->peer.transport_addr_list, &new_addr->ipaddr)) { - sctp_sf_send_restart_abort(&new_addr->ipaddr, init, + sctp_sf_send_restart_abort(net, &new_addr->ipaddr, init, commands); ret = 0; break; @@ -1650,10 +1656,11 @@ sctp_disposition_t sctp_sf_do_5_2_3_initack(const struct sctp_endpoint *ep, const sctp_subtype_t type, void *arg, sctp_cmd_seq_t *commands) { + struct net *net = sock_net(ep->base.sk); /* Per the above section, we'll discard the chunk if we have an * endpoint. If this is an OOTB INIT-ACK, treat it as such. */ - if (ep == sctp_sk((sctp_get_ctl_sock()))->ep) + if (ep == sctp_sk(net->sctp.ctl_sock)->ep) return sctp_sf_ootb(ep, asoc, type, arg, commands); else return sctp_sf_discard_chunk(ep, asoc, type, arg, commands); @@ -3163,8 +3170,10 @@ static sctp_disposition_t sctp_sf_tabort_8_4_8(const struct sctp_endpoint *ep, struct sctp_packet *packet = NULL; struct sctp_chunk *chunk = arg; struct sctp_chunk *abort; + struct net *net; - packet = sctp_ootb_pkt_new(asoc, chunk); + net = sock_net(ep->base.sk); + packet = sctp_ootb_pkt_new(net, asoc, chunk); if (packet) { /* Make an ABORT. The T bit will be set if the asoc @@ -3425,8 +3434,10 @@ static sctp_disposition_t sctp_sf_shut_8_4_5(const struct sctp_endpoint *ep, struct sctp_packet *packet = NULL; struct sctp_chunk *chunk = arg; struct sctp_chunk *shut; + struct net *net; - packet = sctp_ootb_pkt_new(asoc, chunk); + net = sock_net(ep->base.sk); + packet = sctp_ootb_pkt_new(net, asoc, chunk); if (packet) { /* Make an SHUTDOWN_COMPLETE. @@ -4262,6 +4273,7 @@ static sctp_disposition_t sctp_sf_abort_violation( struct sctp_packet *packet = NULL; struct sctp_chunk *chunk = arg; struct sctp_chunk *abort = NULL; + struct net *net; /* SCTP-AUTH, Section 6.3: * It should be noted that if the receiver wants to tear @@ -4282,6 +4294,7 @@ static sctp_disposition_t sctp_sf_abort_violation( if (!abort) goto nomem; + net = sock_net(ep->base.sk); if (asoc) { /* Treat INIT-ACK as a special case during COOKIE-WAIT. */ if (chunk->chunk_hdr->type == SCTP_CID_INIT_ACK && @@ -4319,7 +4332,7 @@ static sctp_disposition_t sctp_sf_abort_violation( SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); } } else { - packet = sctp_ootb_pkt_new(asoc, chunk); + packet = sctp_ootb_pkt_new(net, asoc, chunk); if (!packet) goto nomem_pkt; @@ -5825,8 +5838,10 @@ static struct sctp_packet *sctp_abort_pkt_new(const struct sctp_endpoint *ep, { struct sctp_packet *packet; struct sctp_chunk *abort; + struct net *net; - packet = sctp_ootb_pkt_new(asoc, chunk); + net = sock_net(ep->base.sk); + packet = sctp_ootb_pkt_new(net, asoc, chunk); if (packet) { /* Make an ABORT. @@ -5858,7 +5873,8 @@ static struct sctp_packet *sctp_abort_pkt_new(const struct sctp_endpoint *ep, } /* Allocate a packet for responding in the OOTB conditions. */ -static struct sctp_packet *sctp_ootb_pkt_new(const struct sctp_association *asoc, +static struct sctp_packet *sctp_ootb_pkt_new(struct net *net, + const struct sctp_association *asoc, const struct sctp_chunk *chunk) { struct sctp_packet *packet; @@ -5919,7 +5935,7 @@ static struct sctp_packet *sctp_ootb_pkt_new(const struct sctp_association *asoc * the source address. */ sctp_transport_route(transport, (union sctp_addr *)&chunk->dest, - sctp_sk(sctp_get_ctl_sock())); + sctp_sk(net->sctp.ctl_sock)); packet = sctp_packet_init(&transport->packet, transport, sport, dport); packet = sctp_packet_config(packet, vtag, 0); @@ -5946,7 +5962,8 @@ static void sctp_send_stale_cookie_err(const struct sctp_endpoint *ep, struct sctp_packet *packet; if (err_chunk) { - packet = sctp_ootb_pkt_new(asoc, chunk); + struct net *net = sock_net(ep->base.sk); + packet = sctp_ootb_pkt_new(net, asoc, chunk); if (packet) { struct sctp_signed_cookie *cookie; -- cgit v1.2.3 From 13d782f6b4fbbaf9d0380a9947deb45a9de46ae7 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 6 Aug 2012 08:45:15 +0000 Subject: sctp: Make the proc files per network namespace. - Convert all of the files under /proc/net/sctp to be per network namespace. - Don't print anything for /proc/net/sctp/snmp except in the initial network namespaces as the snmp counters still have to be converted to be per network namespace. Signed-off-by: "Eric W. Biederman" Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/net/netns/sctp.h | 5 +++ include/net/sctp/sctp.h | 25 +++++++------- net/sctp/objcnt.c | 8 ++--- net/sctp/proc.c | 59 +++++++++++++++++++++----------- net/sctp/protocol.c | 87 +++++++++++++++++++++--------------------------- 5 files changed, 98 insertions(+), 86 deletions(-) (limited to 'include/net/netns') diff --git a/include/net/netns/sctp.h b/include/net/netns/sctp.h index 29e36b4b4feb..9c20a82a77ec 100644 --- a/include/net/netns/sctp.h +++ b/include/net/netns/sctp.h @@ -2,8 +2,13 @@ #define __NETNS_SCTP_H__ struct sock; +struct proc_dir_entry; struct netns_sctp { +#ifdef CONFIG_PROC_FS + struct proc_dir_entry *proc_net_sctp; +#endif + /* This is the global socket data structure used for responding to * the Out-of-the-blue (OOTB) packets. A control sock will be created * for this socket at the initialization time. diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 550a81bbfc71..b49588a51d80 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -172,14 +172,14 @@ void sctp_backlog_migrate(struct sctp_association *assoc, /* * sctp/proc.c */ -int sctp_snmp_proc_init(void); -void sctp_snmp_proc_exit(void); -int sctp_eps_proc_init(void); -void sctp_eps_proc_exit(void); -int sctp_assocs_proc_init(void); -void sctp_assocs_proc_exit(void); -int sctp_remaddr_proc_init(void); -void sctp_remaddr_proc_exit(void); +int sctp_snmp_proc_init(struct net *net); +void sctp_snmp_proc_exit(struct net *net); +int sctp_eps_proc_init(struct net *net); +void sctp_eps_proc_exit(struct net *net); +int sctp_assocs_proc_init(struct net *net); +void sctp_assocs_proc_exit(struct net *net); +int sctp_remaddr_proc_init(struct net *net); +void sctp_remaddr_proc_exit(struct net *net); /* @@ -360,16 +360,16 @@ atomic_t sctp_dbg_objcnt_## name = ATOMIC_INIT(0) #define SCTP_DBG_OBJCNT_ENTRY(name) \ {.label= #name, .counter= &sctp_dbg_objcnt_## name} -void sctp_dbg_objcnt_init(void); -void sctp_dbg_objcnt_exit(void); +void sctp_dbg_objcnt_init(struct net *); +void sctp_dbg_objcnt_exit(struct net *); #else #define SCTP_DBG_OBJCNT_INC(name) #define SCTP_DBG_OBJCNT_DEC(name) -static inline void sctp_dbg_objcnt_init(void) { return; } -static inline void sctp_dbg_objcnt_exit(void) { return; } +static inline void sctp_dbg_objcnt_init(struct net *) { return; } +static inline void sctp_dbg_objcnt_exit(struct net *) { return; } #endif /* CONFIG_SCTP_DBG_OBJCOUNT */ @@ -585,7 +585,6 @@ for (pos = chunk->subh.fwdtsn_hdr->skip;\ extern struct proto sctp_prot; extern struct proto sctpv6_prot; -extern struct proc_dir_entry *proc_net_sctp; void sctp_put_port(struct sock *sk); extern struct idr sctp_assocs_id; diff --git a/net/sctp/objcnt.c b/net/sctp/objcnt.c index 8ef8e7d9eb61..fe012c44f8df 100644 --- a/net/sctp/objcnt.c +++ b/net/sctp/objcnt.c @@ -129,20 +129,20 @@ static const struct file_operations sctp_objcnt_ops = { }; /* Initialize the objcount in the proc filesystem. */ -void sctp_dbg_objcnt_init(void) +void sctp_dbg_objcnt_init(struct net *net) { struct proc_dir_entry *ent; ent = proc_create("sctp_dbg_objcnt", 0, - proc_net_sctp, &sctp_objcnt_ops); + net->sctp.proc_net_sctp, &sctp_objcnt_ops); if (!ent) pr_warn("sctp_dbg_objcnt: Unable to create /proc entry.\n"); } /* Cleanup the objcount entry in the proc filesystem. */ -void sctp_dbg_objcnt_exit(void) +void sctp_dbg_objcnt_exit(struct net *net) { - remove_proc_entry("sctp_dbg_objcnt", proc_net_sctp); + remove_proc_entry("sctp_dbg_objcnt", net->sctp.proc_net_sctp); } diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 1e2eee88c3ea..dc79a3aa0382 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -80,8 +80,12 @@ static const struct snmp_mib sctp_snmp_list[] = { /* Display sctp snmp mib statistics(/proc/net/sctp/snmp). */ static int sctp_snmp_seq_show(struct seq_file *seq, void *v) { + struct net *net = seq->private; int i; + if (!net_eq(net, &init_net)) + return 0; + for (i = 0; sctp_snmp_list[i].name != NULL; i++) seq_printf(seq, "%-32s\t%ld\n", sctp_snmp_list[i].name, snmp_fold_field((void __percpu **)sctp_statistics, @@ -93,7 +97,7 @@ static int sctp_snmp_seq_show(struct seq_file *seq, void *v) /* Initialize the seq file operations for 'snmp' object. */ static int sctp_snmp_seq_open(struct inode *inode, struct file *file) { - return single_open(file, sctp_snmp_seq_show, NULL); + return single_open_net(inode, file, sctp_snmp_seq_show); } static const struct file_operations sctp_snmp_seq_fops = { @@ -105,11 +109,12 @@ static const struct file_operations sctp_snmp_seq_fops = { }; /* Set up the proc fs entry for 'snmp' object. */ -int __init sctp_snmp_proc_init(void) +int __net_init sctp_snmp_proc_init(struct net *net) { struct proc_dir_entry *p; - p = proc_create("snmp", S_IRUGO, proc_net_sctp, &sctp_snmp_seq_fops); + p = proc_create("snmp", S_IRUGO, net->sctp.proc_net_sctp, + &sctp_snmp_seq_fops); if (!p) return -ENOMEM; @@ -117,9 +122,9 @@ int __init sctp_snmp_proc_init(void) } /* Cleanup the proc fs entry for 'snmp' object. */ -void sctp_snmp_proc_exit(void) +void sctp_snmp_proc_exit(struct net *net) { - remove_proc_entry("snmp", proc_net_sctp); + remove_proc_entry("snmp", net->sctp.proc_net_sctp); } /* Dump local addresses of an association/endpoint. */ @@ -197,6 +202,7 @@ static void * sctp_eps_seq_next(struct seq_file *seq, void *v, loff_t *pos) /* Display sctp endpoints (/proc/net/sctp/eps). */ static int sctp_eps_seq_show(struct seq_file *seq, void *v) { + struct seq_net_private *priv = seq->private; struct sctp_hashbucket *head; struct sctp_ep_common *epb; struct sctp_endpoint *ep; @@ -213,6 +219,8 @@ static int sctp_eps_seq_show(struct seq_file *seq, void *v) sctp_for_each_hentry(epb, node, &head->chain) { ep = sctp_ep(epb); sk = epb->sk; + if (!net_eq(sock_net(sk), priv->net)) + continue; seq_printf(seq, "%8pK %8pK %-3d %-3d %-4d %-5d %5d %5lu ", ep, sk, sctp_sk(sk)->type, sk->sk_state, hash, epb->bind_addr.port, @@ -238,7 +246,8 @@ static const struct seq_operations sctp_eps_ops = { /* Initialize the seq file operations for 'eps' object. */ static int sctp_eps_seq_open(struct inode *inode, struct file *file) { - return seq_open(file, &sctp_eps_ops); + return seq_open_net(inode, file, &sctp_eps_ops, + sizeof(struct seq_net_private)); } static const struct file_operations sctp_eps_seq_fops = { @@ -249,11 +258,12 @@ static const struct file_operations sctp_eps_seq_fops = { }; /* Set up the proc fs entry for 'eps' object. */ -int __init sctp_eps_proc_init(void) +int __net_init sctp_eps_proc_init(struct net *net) { struct proc_dir_entry *p; - p = proc_create("eps", S_IRUGO, proc_net_sctp, &sctp_eps_seq_fops); + p = proc_create("eps", S_IRUGO, net->sctp.proc_net_sctp, + &sctp_eps_seq_fops); if (!p) return -ENOMEM; @@ -261,9 +271,9 @@ int __init sctp_eps_proc_init(void) } /* Cleanup the proc fs entry for 'eps' object. */ -void sctp_eps_proc_exit(void) +void sctp_eps_proc_exit(struct net *net) { - remove_proc_entry("eps", proc_net_sctp); + remove_proc_entry("eps", net->sctp.proc_net_sctp); } @@ -300,6 +310,7 @@ static void * sctp_assocs_seq_next(struct seq_file *seq, void *v, loff_t *pos) /* Display sctp associations (/proc/net/sctp/assocs). */ static int sctp_assocs_seq_show(struct seq_file *seq, void *v) { + struct seq_net_private *priv = seq->private; struct sctp_hashbucket *head; struct sctp_ep_common *epb; struct sctp_association *assoc; @@ -316,6 +327,8 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v) sctp_for_each_hentry(epb, node, &head->chain) { assoc = sctp_assoc(epb); sk = epb->sk; + if (!net_eq(sock_net(sk), priv->net)) + continue; seq_printf(seq, "%8pK %8pK %-3d %-3d %-2d %-4d " "%4d %8d %8d %7d %5lu %-5d %5d ", @@ -354,7 +367,8 @@ static const struct seq_operations sctp_assoc_ops = { /* Initialize the seq file operations for 'assocs' object. */ static int sctp_assocs_seq_open(struct inode *inode, struct file *file) { - return seq_open(file, &sctp_assoc_ops); + return seq_open_net(inode, file, &sctp_assoc_ops, + sizeof(struct seq_net_private)); } static const struct file_operations sctp_assocs_seq_fops = { @@ -365,11 +379,11 @@ static const struct file_operations sctp_assocs_seq_fops = { }; /* Set up the proc fs entry for 'assocs' object. */ -int __init sctp_assocs_proc_init(void) +int __net_init sctp_assocs_proc_init(struct net *net) { struct proc_dir_entry *p; - p = proc_create("assocs", S_IRUGO, proc_net_sctp, + p = proc_create("assocs", S_IRUGO, net->sctp.proc_net_sctp, &sctp_assocs_seq_fops); if (!p) return -ENOMEM; @@ -378,9 +392,9 @@ int __init sctp_assocs_proc_init(void) } /* Cleanup the proc fs entry for 'assocs' object. */ -void sctp_assocs_proc_exit(void) +void sctp_assocs_proc_exit(struct net *net) { - remove_proc_entry("assocs", proc_net_sctp); + remove_proc_entry("assocs", net->sctp.proc_net_sctp); } static void *sctp_remaddr_seq_start(struct seq_file *seq, loff_t *pos) @@ -412,6 +426,7 @@ static void sctp_remaddr_seq_stop(struct seq_file *seq, void *v) static int sctp_remaddr_seq_show(struct seq_file *seq, void *v) { + struct seq_net_private *priv = seq->private; struct sctp_hashbucket *head; struct sctp_ep_common *epb; struct sctp_association *assoc; @@ -426,6 +441,8 @@ static int sctp_remaddr_seq_show(struct seq_file *seq, void *v) sctp_local_bh_disable(); read_lock(&head->lock); sctp_for_each_hentry(epb, node, &head->chain) { + if (!net_eq(sock_net(epb->sk), priv->net)) + continue; assoc = sctp_assoc(epb); list_for_each_entry(tsp, &assoc->peer.transport_addr_list, transports) { @@ -489,14 +506,15 @@ static const struct seq_operations sctp_remaddr_ops = { }; /* Cleanup the proc fs entry for 'remaddr' object. */ -void sctp_remaddr_proc_exit(void) +void sctp_remaddr_proc_exit(struct net *net) { - remove_proc_entry("remaddr", proc_net_sctp); + remove_proc_entry("remaddr", net->sctp.proc_net_sctp); } static int sctp_remaddr_seq_open(struct inode *inode, struct file *file) { - return seq_open(file, &sctp_remaddr_ops); + return seq_open_net(inode, file, &sctp_remaddr_ops, + sizeof(struct seq_net_private)); } static const struct file_operations sctp_remaddr_seq_fops = { @@ -506,11 +524,12 @@ static const struct file_operations sctp_remaddr_seq_fops = { .release = seq_release, }; -int __init sctp_remaddr_proc_init(void) +int __net_init sctp_remaddr_proc_init(struct net *net) { struct proc_dir_entry *p; - p = proc_create("remaddr", S_IRUGO, proc_net_sctp, &sctp_remaddr_seq_fops); + p = proc_create("remaddr", S_IRUGO, net->sctp.proc_net_sctp, + &sctp_remaddr_seq_fops); if (!p) return -ENOMEM; return 0; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 48a5989c98ce..de7994edb4ca 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -71,10 +71,6 @@ struct sctp_globals sctp_globals __read_mostly; DEFINE_SNMP_STAT(struct sctp_mib, sctp_statistics) __read_mostly; -#ifdef CONFIG_PROC_FS -struct proc_dir_entry *proc_net_sctp; -#endif - struct idr sctp_assocs_id; DEFINE_SPINLOCK(sctp_assocs_id_lock); @@ -91,60 +87,52 @@ int sysctl_sctp_rmem[3]; int sysctl_sctp_wmem[3]; /* Set up the proc fs entry for the SCTP protocol. */ -static __init int sctp_proc_init(void) +static __net_init int sctp_proc_init(struct net *net) { #ifdef CONFIG_PROC_FS - if (!proc_net_sctp) { - proc_net_sctp = proc_mkdir("sctp", init_net.proc_net); - if (!proc_net_sctp) - goto out_free_percpu; - } - - if (sctp_snmp_proc_init()) + net->sctp.proc_net_sctp = proc_net_mkdir(net, "sctp", net->proc_net); + if (!net->sctp.proc_net_sctp) + goto out_proc_net_sctp; + if (sctp_snmp_proc_init(net)) goto out_snmp_proc_init; - if (sctp_eps_proc_init()) + if (sctp_eps_proc_init(net)) goto out_eps_proc_init; - if (sctp_assocs_proc_init()) + if (sctp_assocs_proc_init(net)) goto out_assocs_proc_init; - if (sctp_remaddr_proc_init()) + if (sctp_remaddr_proc_init(net)) goto out_remaddr_proc_init; return 0; out_remaddr_proc_init: - sctp_assocs_proc_exit(); + sctp_assocs_proc_exit(net); out_assocs_proc_init: - sctp_eps_proc_exit(); + sctp_eps_proc_exit(net); out_eps_proc_init: - sctp_snmp_proc_exit(); + sctp_snmp_proc_exit(net); out_snmp_proc_init: - if (proc_net_sctp) { - proc_net_sctp = NULL; - remove_proc_entry("sctp", init_net.proc_net); - } -out_free_percpu: -#else - return 0; -#endif /* CONFIG_PROC_FS */ + remove_proc_entry("sctp", net->proc_net); + net->sctp.proc_net_sctp = NULL; +out_proc_net_sctp: return -ENOMEM; +#endif /* CONFIG_PROC_FS */ + return 0; } /* Clean up the proc fs entry for the SCTP protocol. * Note: Do not make this __exit as it is used in the init error * path. */ -static void sctp_proc_exit(void) +static void sctp_proc_exit(struct net *net) { #ifdef CONFIG_PROC_FS - sctp_snmp_proc_exit(); - sctp_eps_proc_exit(); - sctp_assocs_proc_exit(); - sctp_remaddr_proc_exit(); - - if (proc_net_sctp) { - proc_net_sctp = NULL; - remove_proc_entry("sctp", init_net.proc_net); - } + sctp_snmp_proc_exit(net); + sctp_eps_proc_exit(net); + sctp_assocs_proc_exit(net); + sctp_remaddr_proc_exit(net); + + remove_proc_entry("sctp", net->proc_net); + net->sctp.proc_net_sctp = NULL; #endif } @@ -1180,6 +1168,13 @@ static int sctp_net_init(struct net *net) { int status; + /* Initialize proc fs directory. */ + status = sctp_proc_init(net); + if (status) + goto err_init_proc; + + sctp_dbg_objcnt_init(net); + /* Initialize the control inode/socket for handling OOTB packets. */ if ((status = sctp_ctl_sock_init(net))) { pr_err("Failed to initialize the SCTP control sock\n"); @@ -1202,6 +1197,9 @@ static int sctp_net_init(struct net *net) return 0; err_ctl_sock_init: + sctp_dbg_objcnt_exit(net); + sctp_proc_exit(net); +err_init_proc: return status; } @@ -1213,6 +1211,10 @@ static void sctp_net_exit(struct net *net) /* Free the control endpoint. */ inet_ctl_sock_destroy(net->sctp.ctl_sock); + + sctp_dbg_objcnt_exit(net); + + sctp_proc_exit(net); } static struct pernet_operations sctp_net_ops = { @@ -1259,14 +1261,6 @@ SCTP_STATIC __init int sctp_init(void) if (status) goto err_percpu_counter_init; - /* Initialize proc fs directory. */ - status = sctp_proc_init(); - if (status) - goto err_init_proc; - - /* Initialize object count debugging. */ - sctp_dbg_objcnt_init(); - /* * 14. Suggested SCTP Protocol Parameter Values */ @@ -1476,9 +1470,6 @@ err_ehash_alloc: get_order(sctp_assoc_hashsize * sizeof(struct sctp_hashbucket))); err_ahash_alloc: - sctp_dbg_objcnt_exit(); - sctp_proc_exit(); -err_init_proc: percpu_counter_destroy(&sctp_sockets_allocated); err_percpu_counter_init: cleanup_sctp_mibs(); @@ -1520,9 +1511,7 @@ SCTP_STATIC __exit void sctp_exit(void) get_order(sctp_port_hashsize * sizeof(struct sctp_bind_hashbucket))); - sctp_dbg_objcnt_exit(); percpu_counter_destroy(&sctp_sockets_allocated); - sctp_proc_exit(); cleanup_sctp_mibs(); rcu_barrier(); /* Wait for completion of call_rcu()'s */ -- cgit v1.2.3 From b01a24078fa3fc4f0f447d1306ce5adc495ead86 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 6 Aug 2012 08:47:55 +0000 Subject: sctp: Make the mib per network namespace Signed-off-by: "Eric W. Biederman" Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/net/netns/sctp.h | 3 + include/net/sctp/sctp.h | 9 ++- net/sctp/associola.c | 2 +- net/sctp/chunk.c | 2 +- net/sctp/endpointola.c | 2 +- net/sctp/input.c | 22 +++---- net/sctp/ipv6.c | 4 +- net/sctp/output.c | 2 +- net/sctp/outqueue.c | 18 +++--- net/sctp/proc.c | 5 +- net/sctp/protocol.c | 27 ++++---- net/sctp/sm_statefuns.c | 163 ++++++++++++++++++++++++++++------------------- net/sctp/ulpqueue.c | 18 ++++-- 13 files changed, 158 insertions(+), 119 deletions(-) (limited to 'include/net/netns') diff --git a/include/net/netns/sctp.h b/include/net/netns/sctp.h index 9c20a82a77ec..06ccddf9566c 100644 --- a/include/net/netns/sctp.h +++ b/include/net/netns/sctp.h @@ -3,8 +3,11 @@ struct sock; struct proc_dir_entry; +struct sctp_mib; struct netns_sctp { + DEFINE_SNMP_STAT(struct sctp_mib, sctp_statistics); + #ifdef CONFIG_PROC_FS struct proc_dir_entry *proc_net_sctp; #endif diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index b49588a51d80..7dcd4dfd7c3f 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -221,11 +221,10 @@ extern struct kmem_cache *sctp_bucket_cachep __read_mostly; #define sctp_bh_unlock_sock(sk) bh_unlock_sock(sk) /* SCTP SNMP MIB stats handlers */ -DECLARE_SNMP_STAT(struct sctp_mib, sctp_statistics); -#define SCTP_INC_STATS(field) SNMP_INC_STATS(sctp_statistics, field) -#define SCTP_INC_STATS_BH(field) SNMP_INC_STATS_BH(sctp_statistics, field) -#define SCTP_INC_STATS_USER(field) SNMP_INC_STATS_USER(sctp_statistics, field) -#define SCTP_DEC_STATS(field) SNMP_DEC_STATS(sctp_statistics, field) +#define SCTP_INC_STATS(net, field) SNMP_INC_STATS((net)->sctp.sctp_statistics, field) +#define SCTP_INC_STATS_BH(net, field) SNMP_INC_STATS_BH((net)->sctp.sctp_statistics, field) +#define SCTP_INC_STATS_USER(net, field) SNMP_INC_STATS_USER((net)->sctp.sctp_statistics, field) +#define SCTP_DEC_STATS(net, field) SNMP_DEC_STATS((net)->sctp.sctp_statistics, field) #endif /* !TEST_FRAME */ diff --git a/net/sctp/associola.c b/net/sctp/associola.c index ed4930b31341..8a1f27a7a001 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1150,7 +1150,7 @@ static void sctp_assoc_bh_rcv(struct work_struct *work) if (sctp_chunk_is_data(chunk)) asoc->peer.last_data_from = chunk->transport; else - SCTP_INC_STATS(SCTP_MIB_INCTRLCHUNKS); + SCTP_INC_STATS(sock_net(asoc->base.sk), SCTP_MIB_INCTRLCHUNKS); if (chunk->transport) chunk->transport->last_time_heard = jiffies; diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 6c8556459a75..7c2df9c33df3 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -257,7 +257,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, offset = 0; if ((whole > 1) || (whole && over)) - SCTP_INC_STATS_USER(SCTP_MIB_FRAGUSRMSGS); + SCTP_INC_STATS_USER(sock_net(asoc->base.sk), SCTP_MIB_FRAGUSRMSGS); /* Create chunks for all the full sized DATA chunks. */ for (i=0, len=first_len; i < whole; i++) { diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 6b763939345b..3edca80ab3a1 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -478,7 +478,7 @@ normal: if (asoc && sctp_chunk_is_data(chunk)) asoc->peer.last_data_from = chunk->transport; else - SCTP_INC_STATS(SCTP_MIB_INCTRLCHUNKS); + SCTP_INC_STATS(sock_net(ep->base.sk), SCTP_MIB_INCTRLCHUNKS); if (chunk->transport) chunk->transport->last_time_heard = jiffies; diff --git a/net/sctp/input.c b/net/sctp/input.c index c9a0449bde53..530830151f04 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -83,7 +83,7 @@ static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb); /* Calculate the SCTP checksum of an SCTP packet. */ -static inline int sctp_rcv_checksum(struct sk_buff *skb) +static inline int sctp_rcv_checksum(struct net *net, struct sk_buff *skb) { struct sctphdr *sh = sctp_hdr(skb); __le32 cmp = sh->checksum; @@ -99,7 +99,7 @@ static inline int sctp_rcv_checksum(struct sk_buff *skb) if (val != cmp) { /* CRC failure, dump it. */ - SCTP_INC_STATS_BH(SCTP_MIB_CHECKSUMERRORS); + SCTP_INC_STATS_BH(net, SCTP_MIB_CHECKSUMERRORS); return -1; } return 0; @@ -137,7 +137,7 @@ int sctp_rcv(struct sk_buff *skb) if (skb->pkt_type!=PACKET_HOST) goto discard_it; - SCTP_INC_STATS_BH(SCTP_MIB_INSCTPPACKS); + SCTP_INC_STATS_BH(net, SCTP_MIB_INSCTPPACKS); if (skb_linearize(skb)) goto discard_it; @@ -149,7 +149,7 @@ int sctp_rcv(struct sk_buff *skb) if (skb->len < sizeof(struct sctphdr)) goto discard_it; if (!sctp_checksum_disable && !skb_csum_unnecessary(skb) && - sctp_rcv_checksum(skb) < 0) + sctp_rcv_checksum(net, skb) < 0) goto discard_it; skb_pull(skb, sizeof(struct sctphdr)); @@ -220,7 +220,7 @@ int sctp_rcv(struct sk_buff *skb) */ if (!asoc) { if (sctp_rcv_ootb(skb)) { - SCTP_INC_STATS_BH(SCTP_MIB_OUTOFBLUES); + SCTP_INC_STATS_BH(net, SCTP_MIB_OUTOFBLUES); goto discard_release; } } @@ -276,9 +276,9 @@ int sctp_rcv(struct sk_buff *skb) skb = NULL; /* sctp_chunk_free already freed the skb */ goto discard_release; } - SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_BACKLOG); + SCTP_INC_STATS_BH(net, SCTP_MIB_IN_PKT_BACKLOG); } else { - SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_SOFTIRQ); + SCTP_INC_STATS_BH(net, SCTP_MIB_IN_PKT_SOFTIRQ); sctp_inq_push(&chunk->rcvr->inqueue, chunk); } @@ -293,7 +293,7 @@ int sctp_rcv(struct sk_buff *skb) return 0; discard_it: - SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_DISCARDS); + SCTP_INC_STATS_BH(net, SCTP_MIB_IN_PKT_DISCARDS); kfree_skb(skb); return 0; @@ -543,7 +543,7 @@ struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *skb, * servers this needs to be solved differently. */ if (sock_owned_by_user(sk)) - NET_INC_STATS_BH(&init_net, LINUX_MIB_LOCKDROPPEDICMPS); + NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); *app = asoc; *tpp = transport; @@ -593,7 +593,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info) struct net *net = dev_net(skb->dev); if (skb->len < ihlen + 8) { - ICMP_INC_STATS_BH(&init_net, ICMP_MIB_INERRORS); + ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); return; } @@ -607,7 +607,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info) skb->network_header = saveip; skb->transport_header = savesctp; if (!sk) { - ICMP_INC_STATS_BH(&init_net, ICMP_MIB_INERRORS); + ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); return; } /* Warning: The sock lock is held. Remember to call diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index a18cda60e156..ea14cb445295 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -169,7 +169,7 @@ SCTP_STATIC void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, skb->network_header = saveip; skb->transport_header = savesctp; if (!sk) { - ICMP6_INC_STATS_BH(dev_net(skb->dev), idev, ICMP6_MIB_INERRORS); + ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_INERRORS); goto out; } @@ -243,7 +243,7 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport) __func__, skb, skb->len, &fl6.saddr, &fl6.daddr); - SCTP_INC_STATS(SCTP_MIB_OUTSCTPPACKS); + SCTP_INC_STATS(sock_net(sk), SCTP_MIB_OUTSCTPPACKS); if (!(transport->param_flags & SPP_PMTUD_ENABLE)) skb->local_df = 1; diff --git a/net/sctp/output.c b/net/sctp/output.c index 838e18b4d7ea..0c6359bb973c 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -597,7 +597,7 @@ out: return err; no_route: kfree_skb(nskb); - IP_INC_STATS_BH(&init_net, IPSTATS_MIB_OUTNOROUTES); + IP_INC_STATS_BH(sock_net(asoc->base.sk), IPSTATS_MIB_OUTNOROUTES); /* FIXME: Returning the 'err' will effect all the associations * associated with a socket, although only one of the paths of the diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index e7aa177c9522..072bf6ae3c26 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -299,6 +299,7 @@ void sctp_outq_free(struct sctp_outq *q) /* Put a new chunk in an sctp_outq. */ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk) { + struct net *net = sock_net(q->asoc->base.sk); int error = 0; SCTP_DEBUG_PRINTK("sctp_outq_tail(%p, %p[%s])\n", @@ -337,15 +338,15 @@ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk) sctp_outq_tail_data(q, chunk); if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED) - SCTP_INC_STATS(SCTP_MIB_OUTUNORDERCHUNKS); + SCTP_INC_STATS(net, SCTP_MIB_OUTUNORDERCHUNKS); else - SCTP_INC_STATS(SCTP_MIB_OUTORDERCHUNKS); + SCTP_INC_STATS(net, SCTP_MIB_OUTORDERCHUNKS); q->empty = 0; break; } } else { list_add_tail(&chunk->list, &q->control_chunk_list); - SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); + SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS); } if (error < 0) @@ -478,11 +479,12 @@ void sctp_retransmit_mark(struct sctp_outq *q, void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport, sctp_retransmit_reason_t reason) { + struct net *net = sock_net(q->asoc->base.sk); int error = 0; switch(reason) { case SCTP_RTXR_T3_RTX: - SCTP_INC_STATS(SCTP_MIB_T3_RETRANSMITS); + SCTP_INC_STATS(net, SCTP_MIB_T3_RETRANSMITS); sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_T3_RTX); /* Update the retran path if the T3-rtx timer has expired for * the current retran path. @@ -493,15 +495,15 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport, transport->asoc->unack_data; break; case SCTP_RTXR_FAST_RTX: - SCTP_INC_STATS(SCTP_MIB_FAST_RETRANSMITS); + SCTP_INC_STATS(net, SCTP_MIB_FAST_RETRANSMITS); sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_FAST_RTX); q->fast_rtx = 1; break; case SCTP_RTXR_PMTUD: - SCTP_INC_STATS(SCTP_MIB_PMTUD_RETRANSMITS); + SCTP_INC_STATS(net, SCTP_MIB_PMTUD_RETRANSMITS); break; case SCTP_RTXR_T1_RTX: - SCTP_INC_STATS(SCTP_MIB_T1_RETRANSMITS); + SCTP_INC_STATS(net, SCTP_MIB_T1_RETRANSMITS); transport->asoc->init_retries++; break; default: @@ -1914,6 +1916,6 @@ static void sctp_generate_fwdtsn(struct sctp_outq *q, __u32 ctsn) if (ftsn_chunk) { list_add_tail(&ftsn_chunk->list, &q->control_chunk_list); - SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); + SCTP_INC_STATS(sock_net(asoc->base.sk), SCTP_MIB_OUTCTRLCHUNKS); } } diff --git a/net/sctp/proc.c b/net/sctp/proc.c index dc79a3aa0382..3e62ee55228f 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -83,12 +83,9 @@ static int sctp_snmp_seq_show(struct seq_file *seq, void *v) struct net *net = seq->private; int i; - if (!net_eq(net, &init_net)) - return 0; - for (i = 0; sctp_snmp_list[i].name != NULL; i++) seq_printf(seq, "%-32s\t%ld\n", sctp_snmp_list[i].name, - snmp_fold_field((void __percpu **)sctp_statistics, + snmp_fold_field((void __percpu **)net->sctp.sctp_statistics, sctp_snmp_list[i].entry)); return 0; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 059c914c09f2..d58db315db85 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -69,7 +69,6 @@ /* Global data structures. */ struct sctp_globals sctp_globals __read_mostly; -DEFINE_SNMP_STAT(struct sctp_mib, sctp_statistics) __read_mostly; struct idr sctp_assocs_id; DEFINE_SPINLOCK(sctp_assocs_id_lock); @@ -961,7 +960,7 @@ static inline int sctp_v4_xmit(struct sk_buff *skb, inet->pmtudisc = transport->param_flags & SPP_PMTUD_ENABLE ? IP_PMTUDISC_DO : IP_PMTUDISC_DONT; - SCTP_INC_STATS(SCTP_MIB_OUTSCTPPACKS); + SCTP_INC_STATS(sock_net(&inet->sk), SCTP_MIB_OUTSCTPPACKS); return ip_queue_xmit(skb, &transport->fl); } @@ -1102,16 +1101,16 @@ int sctp_register_pf(struct sctp_pf *pf, sa_family_t family) return 1; } -static inline int init_sctp_mibs(void) +static inline int init_sctp_mibs(struct net *net) { - return snmp_mib_init((void __percpu **)sctp_statistics, + return snmp_mib_init((void __percpu **)net->sctp.sctp_statistics, sizeof(struct sctp_mib), __alignof__(struct sctp_mib)); } -static inline void cleanup_sctp_mibs(void) +static inline void cleanup_sctp_mibs(struct net *net) { - snmp_mib_free((void __percpu **)sctp_statistics); + snmp_mib_free((void __percpu **)net->sctp.sctp_statistics); } static void sctp_v4_pf_init(void) @@ -1170,6 +1169,11 @@ static int sctp_net_init(struct net *net) { int status; + /* Allocate and initialise sctp mibs. */ + status = init_sctp_mibs(net); + if (status) + goto err_init_mibs; + /* Initialize proc fs directory. */ status = sctp_proc_init(net); if (status) @@ -1202,6 +1206,8 @@ err_ctl_sock_init: sctp_dbg_objcnt_exit(net); sctp_proc_exit(net); err_init_proc: + cleanup_sctp_mibs(net); +err_init_mibs: return status; } @@ -1217,6 +1223,7 @@ static void sctp_net_exit(struct net *net) sctp_dbg_objcnt_exit(net); sctp_proc_exit(net); + cleanup_sctp_mibs(net); } static struct pernet_operations sctp_net_ops = { @@ -1254,11 +1261,6 @@ SCTP_STATIC __init int sctp_init(void) if (!sctp_chunk_cachep) goto err_chunk_cachep; - /* Allocate and initialise sctp mibs. */ - status = init_sctp_mibs(); - if (status) - goto err_init_mibs; - status = percpu_counter_init(&sctp_sockets_allocated, 0); if (status) goto err_percpu_counter_init; @@ -1474,8 +1476,6 @@ err_ehash_alloc: err_ahash_alloc: percpu_counter_destroy(&sctp_sockets_allocated); err_percpu_counter_init: - cleanup_sctp_mibs(); -err_init_mibs: kmem_cache_destroy(sctp_chunk_cachep); err_chunk_cachep: kmem_cache_destroy(sctp_bucket_cachep); @@ -1514,7 +1514,6 @@ SCTP_STATIC __exit void sctp_exit(void) sizeof(struct sctp_bind_hashbucket))); percpu_counter_destroy(&sctp_sockets_allocated); - cleanup_sctp_mibs(); rcu_barrier(); /* Wait for completion of call_rcu()'s */ diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index f2daf615e8fe..bee5e2c288d8 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -213,6 +213,7 @@ sctp_disposition_t sctp_sf_do_4_C(const struct sctp_endpoint *ep, { struct sctp_chunk *chunk = arg; struct sctp_ulpevent *ev; + struct net *net; if (!sctp_vtag_verify_either(chunk, asoc)) return sctp_sf_pdiscard(ep, asoc, type, arg, commands); @@ -260,8 +261,9 @@ sctp_disposition_t sctp_sf_do_4_C(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_CLOSED)); - SCTP_INC_STATS(SCTP_MIB_SHUTDOWNS); - SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); + net = sock_net(asoc->base.sk); + SCTP_INC_STATS(net, SCTP_MIB_SHUTDOWNS); + SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL()); @@ -322,7 +324,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep, */ net = sock_net(ep->base.sk); if (ep == sctp_sk(net->sctp.ctl_sock)->ep) { - SCTP_INC_STATS(SCTP_MIB_OUTOFBLUES); + SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES); return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands); } @@ -369,7 +371,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep, if (packet) { sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT, SCTP_PACKET(packet)); - SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); + SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS); return SCTP_DISPOSITION_CONSUME; } else { return SCTP_DISPOSITION_NOMEM; @@ -540,7 +542,7 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep, if (packet) { sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT, SCTP_PACKET(packet)); - SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); + SCTP_INC_STATS(sock_net(ep->base.sk), SCTP_MIB_OUTCTRLCHUNKS); error = SCTP_ERROR_INV_PARAM; } } @@ -559,7 +561,7 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep, if (sctp_auth_recv_cid(SCTP_CID_ABORT, asoc)) return sctp_sf_pdiscard(ep, asoc, type, arg, commands); - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); + SCTP_INC_STATS(sock_net(ep->base.sk), SCTP_MIB_ABORTEDS); return sctp_stop_t1_and_abort(commands, error, ECONNREFUSED, asoc, chunk->transport); } @@ -656,7 +658,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep, */ net = sock_net(ep->base.sk); if (ep == sctp_sk(net->sctp.ctl_sock)->ep) { - SCTP_INC_STATS(SCTP_MIB_OUTOFBLUES); + SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES); return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands); } @@ -809,8 +811,8 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc)); sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_ESTABLISHED)); - SCTP_INC_STATS(SCTP_MIB_CURRESTAB); - SCTP_INC_STATS(SCTP_MIB_PASSIVEESTABS); + SCTP_INC_STATS(net, SCTP_MIB_CURRESTAB); + SCTP_INC_STATS(net, SCTP_MIB_PASSIVEESTABS); sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL()); if (new_asoc->autoclose) @@ -868,6 +870,7 @@ sctp_disposition_t sctp_sf_do_5_1E_ca(const struct sctp_endpoint *ep, { struct sctp_chunk *chunk = arg; struct sctp_ulpevent *ev; + struct net *net; if (!sctp_vtag_verify(chunk, asoc)) return sctp_sf_pdiscard(ep, asoc, type, arg, commands); @@ -897,8 +900,9 @@ sctp_disposition_t sctp_sf_do_5_1E_ca(const struct sctp_endpoint *ep, SCTP_TO(SCTP_EVENT_TIMEOUT_T1_COOKIE)); sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_ESTABLISHED)); - SCTP_INC_STATS(SCTP_MIB_CURRESTAB); - SCTP_INC_STATS(SCTP_MIB_ACTIVEESTABS); + net = sock_net(ep->base.sk); + SCTP_INC_STATS(net, SCTP_MIB_CURRESTAB); + SCTP_INC_STATS(net, SCTP_MIB_ACTIVEESTABS); sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL()); if (asoc->autoclose) sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START, @@ -972,13 +976,15 @@ sctp_disposition_t sctp_sf_sendbeat_8_3(const struct sctp_endpoint *ep, struct sctp_transport *transport = (struct sctp_transport *) arg; if (asoc->overall_error_count >= asoc->max_retrans) { + struct net *net; sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ETIMEDOUT)); /* CMD_ASSOC_FAILED calls CMD_DELETE_TCB. */ sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(SCTP_ERROR_NO_ERROR)); - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); - SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); + net = sock_net(ep->base.sk); + SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); + SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); return SCTP_DISPOSITION_DELETE_TCB; } @@ -1213,7 +1219,7 @@ static int sctp_sf_send_restart_abort(struct net *net, union sctp_addr *ssa, goto out; sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT, SCTP_PACKET(pkt)); - SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); + SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS); /* Discard the rest of the inbound packet. */ sctp_add_cmd_sf(commands, SCTP_CMD_DISCARD_PACKET, SCTP_NULL()); @@ -1427,7 +1433,7 @@ static sctp_disposition_t sctp_sf_do_unexpected_init( if (packet) { sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT, SCTP_PACKET(packet)); - SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); + SCTP_INC_STATS(sock_net(ep->base.sk), SCTP_MIB_OUTCTRLCHUNKS); retval = SCTP_DISPOSITION_CONSUME; } else { retval = SCTP_DISPOSITION_NOMEM; @@ -1791,7 +1797,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_b(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc)); sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_ESTABLISHED)); - SCTP_INC_STATS(SCTP_MIB_CURRESTAB); + SCTP_INC_STATS(sock_net(new_asoc->base.sk), SCTP_MIB_CURRESTAB); sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL()); repl = sctp_make_cookie_ack(new_asoc, chunk); @@ -1883,7 +1889,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_d(const struct sctp_endpoint *ep, SCTP_TO(SCTP_EVENT_TIMEOUT_T1_COOKIE)); sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_ESTABLISHED)); - SCTP_INC_STATS(SCTP_MIB_CURRESTAB); + SCTP_INC_STATS(sock_net(asoc->base.sk), SCTP_MIB_CURRESTAB); sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL()); @@ -2417,6 +2423,7 @@ static sctp_disposition_t __sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep, struct sctp_chunk *chunk = arg; unsigned int len; __be16 error = SCTP_ERROR_NO_ERROR; + struct net *net; /* See if we have an error cause code in the chunk. */ len = ntohs(chunk->chunk_hdr->length); @@ -2433,8 +2440,9 @@ static sctp_disposition_t __sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ECONNRESET)); /* ASSOC_FAILED will DELETE_TCB. */ sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(error)); - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); - SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); + net = sock_net(ep->base.sk); + SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); + SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); return SCTP_DISPOSITION_ABORT; } @@ -2521,7 +2529,7 @@ static sctp_disposition_t sctp_stop_t1_and_abort(sctp_cmd_seq_t *commands, SCTP_DEBUG_PRINTK("ABORT received (INIT).\n"); sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_CLOSED)); - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); + SCTP_INC_STATS(sock_net(asoc->base.sk), SCTP_MIB_ABORTEDS); sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_T1_INIT)); sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(sk_err)); @@ -2904,11 +2912,11 @@ sctp_disposition_t sctp_sf_eat_data_6_2(const struct sctp_endpoint *ep, break; case SCTP_IERROR_HIGH_TSN: case SCTP_IERROR_BAD_STREAM: - SCTP_INC_STATS(SCTP_MIB_IN_DATA_CHUNK_DISCARDS); + SCTP_INC_STATS(sock_net(asoc->base.sk), SCTP_MIB_IN_DATA_CHUNK_DISCARDS); goto discard_noforce; case SCTP_IERROR_DUP_TSN: case SCTP_IERROR_IGNORE_TSN: - SCTP_INC_STATS(SCTP_MIB_IN_DATA_CHUNK_DISCARDS); + SCTP_INC_STATS(sock_net(asoc->base.sk), SCTP_MIB_IN_DATA_CHUNK_DISCARDS); goto discard_force; case SCTP_IERROR_NO_DATA: goto consume; @@ -3197,7 +3205,7 @@ static sctp_disposition_t sctp_sf_tabort_8_4_8(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT, SCTP_PACKET(packet)); - SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); + SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS); sctp_sf_pdiscard(ep, asoc, type, arg, commands); return SCTP_DISPOSITION_CONSUME; @@ -3260,6 +3268,7 @@ sctp_disposition_t sctp_sf_do_9_2_final(const struct sctp_endpoint *ep, struct sctp_chunk *chunk = arg; struct sctp_chunk *reply; struct sctp_ulpevent *ev; + struct net *net; if (!sctp_vtag_verify(chunk, asoc)) return sctp_sf_pdiscard(ep, asoc, type, arg, commands); @@ -3299,8 +3308,9 @@ sctp_disposition_t sctp_sf_do_9_2_final(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_CLOSED)); - SCTP_INC_STATS(SCTP_MIB_SHUTDOWNS); - SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); + net = sock_net(asoc->base.sk); + SCTP_INC_STATS(net, SCTP_MIB_SHUTDOWNS); + SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(reply)); /* ...and remove all record of the association. */ @@ -3346,8 +3356,10 @@ sctp_disposition_t sctp_sf_ootb(const struct sctp_endpoint *ep, __u8 *ch_end; int ootb_shut_ack = 0; int ootb_cookie_ack = 0; + struct net *net; - SCTP_INC_STATS(SCTP_MIB_OUTOFBLUES); + net = sock_net(asoc->base.sk); + SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES); ch = (sctp_chunkhdr_t *) chunk->chunk_hdr; do { @@ -3461,7 +3473,7 @@ static sctp_disposition_t sctp_sf_shut_8_4_5(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT, SCTP_PACKET(packet)); - SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); + SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS); /* If the chunk length is invalid, we don't want to process * the reset of the packet. @@ -3508,7 +3520,7 @@ sctp_disposition_t sctp_sf_do_8_5_1_E_sa(const struct sctp_endpoint *ep, * packet and the state function that handles OOTB SHUTDOWN_ACK is * called with a NULL association. */ - SCTP_INC_STATS(SCTP_MIB_OUTOFBLUES); + SCTP_INC_STATS(sock_net(ep->base.sk), SCTP_MIB_OUTOFBLUES); return sctp_sf_shut_8_4_5(ep, NULL, type, arg, commands); } @@ -3699,6 +3711,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep, */ if (ADDIP_SERIAL_gte(rcvd_serial, sent_serial + 1) && !(asoc->addip_last_asconf)) { + struct net *net; abort = sctp_make_abort(asoc, asconf_ack, sizeof(sctp_errhdr_t)); if (abort) { @@ -3716,12 +3729,14 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep, SCTP_ERROR(ECONNABORTED)); sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(SCTP_ERROR_ASCONF_ACK)); - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); - SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); + net = sock_net(asoc->base.sk); + SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); + SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); return SCTP_DISPOSITION_ABORT; } if ((rcvd_serial == sent_serial) && asoc->addip_last_asconf) { + struct net *net; sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_T4_RTO)); @@ -3750,8 +3765,9 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep, SCTP_ERROR(ECONNABORTED)); sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(SCTP_ERROR_ASCONF_ACK)); - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); - SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); + net = sock_net(asoc->base.sk); + SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); + SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); return SCTP_DISPOSITION_ABORT; } @@ -4222,7 +4238,7 @@ sctp_disposition_t sctp_sf_pdiscard(const struct sctp_endpoint *ep, void *arg, sctp_cmd_seq_t *commands) { - SCTP_INC_STATS(SCTP_MIB_IN_PKT_DISCARDS); + SCTP_INC_STATS(sock_net(asoc->base.sk), SCTP_MIB_IN_PKT_DISCARDS); sctp_add_cmd_sf(commands, SCTP_CMD_DISCARD_PACKET, SCTP_NULL()); return SCTP_DISPOSITION_CONSUME; @@ -4315,7 +4331,7 @@ static sctp_disposition_t sctp_sf_abort_violation( } sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); - SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); + SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS); if (asoc->state <= SCTP_STATE_COOKIE_ECHOED) { sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, @@ -4329,7 +4345,7 @@ static sctp_disposition_t sctp_sf_abort_violation( SCTP_ERROR(ECONNABORTED)); sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(SCTP_ERROR_PROTO_VIOLATION)); - SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); + SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); } } else { packet = sctp_ootb_pkt_new(net, asoc, chunk); @@ -4347,10 +4363,10 @@ static sctp_disposition_t sctp_sf_abort_violation( sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT, SCTP_PACKET(packet)); - SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); + SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS); } - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); + SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); discard: sctp_sf_pdiscard(ep, asoc, SCTP_ST_CHUNK(0), arg, commands); @@ -4410,6 +4426,7 @@ static sctp_disposition_t sctp_sf_violation_paramlen( struct sctp_chunk *chunk = arg; struct sctp_paramhdr *param = ext; struct sctp_chunk *abort = NULL; + struct net *net; if (sctp_auth_recv_cid(SCTP_CID_ABORT, asoc)) goto discard; @@ -4419,15 +4436,16 @@ static sctp_disposition_t sctp_sf_violation_paramlen( if (!abort) goto nomem; + net = sock_net(asoc->base.sk); sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); - SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); + SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS); sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ECONNABORTED)); sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(SCTP_ERROR_PROTO_VIOLATION)); - SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); + SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); + SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); discard: sctp_sf_pdiscard(ep, asoc, SCTP_ST_CHUNK(0), arg, commands); @@ -4757,6 +4775,7 @@ sctp_disposition_t sctp_sf_do_9_1_prm_abort( */ struct sctp_chunk *abort = arg; sctp_disposition_t retval; + struct net *net; retval = SCTP_DISPOSITION_CONSUME; @@ -4772,8 +4791,9 @@ sctp_disposition_t sctp_sf_do_9_1_prm_abort( sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(SCTP_ERROR_USER_ABORT)); - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); - SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); + net = sock_net(asoc->base.sk); + SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); + SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); return retval; } @@ -4824,13 +4844,15 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_shutdown( void *arg, sctp_cmd_seq_t *commands) { + struct net *net = sock_net(asoc->base.sk); + sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_T1_INIT)); sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_CLOSED)); - SCTP_INC_STATS(SCTP_MIB_SHUTDOWNS); + SCTP_INC_STATS(net, SCTP_MIB_SHUTDOWNS); sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL()); @@ -4886,6 +4908,7 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_abort( { struct sctp_chunk *abort = arg; sctp_disposition_t retval; + struct net *net = sock_net(asoc->base.sk); /* Stop T1-init timer */ sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, @@ -4897,7 +4920,7 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_abort( sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_CLOSED)); - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); + SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); /* Even if we can't send the ABORT due to low memory delete the * TCB. This is a departure from our typical NOMEM handling. @@ -5318,8 +5341,9 @@ sctp_disposition_t sctp_sf_do_6_3_3_rtx(const struct sctp_endpoint *ep, sctp_cmd_seq_t *commands) { struct sctp_transport *transport = arg; + struct net *net = sock_net(asoc->base.sk); - SCTP_INC_STATS(SCTP_MIB_T3_RTX_EXPIREDS); + SCTP_INC_STATS(net, SCTP_MIB_T3_RTX_EXPIREDS); if (asoc->overall_error_count >= asoc->max_retrans) { if (asoc->state == SCTP_STATE_SHUTDOWN_PENDING) { @@ -5340,8 +5364,8 @@ sctp_disposition_t sctp_sf_do_6_3_3_rtx(const struct sctp_endpoint *ep, /* CMD_ASSOC_FAILED calls CMD_DELETE_TCB. */ sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(SCTP_ERROR_NO_ERROR)); - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); - SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); + SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); + SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); return SCTP_DISPOSITION_DELETE_TCB; } } @@ -5403,7 +5427,8 @@ sctp_disposition_t sctp_sf_do_6_2_sack(const struct sctp_endpoint *ep, void *arg, sctp_cmd_seq_t *commands) { - SCTP_INC_STATS(SCTP_MIB_DELAY_SACK_EXPIREDS); + struct net *net = sock_net(asoc->base.sk); + SCTP_INC_STATS(net, SCTP_MIB_DELAY_SACK_EXPIREDS); sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_FORCE()); return SCTP_DISPOSITION_CONSUME; } @@ -5436,9 +5461,10 @@ sctp_disposition_t sctp_sf_t1_init_timer_expire(const struct sctp_endpoint *ep, struct sctp_chunk *repl = NULL; struct sctp_bind_addr *bp; int attempts = asoc->init_err_counter + 1; + struct net *net = sock_net(asoc->base.sk); SCTP_DEBUG_PRINTK("Timer T1 expired (INIT).\n"); - SCTP_INC_STATS(SCTP_MIB_T1_INIT_EXPIREDS); + SCTP_INC_STATS(net, SCTP_MIB_T1_INIT_EXPIREDS); if (attempts <= asoc->max_init_attempts) { bp = (struct sctp_bind_addr *) &asoc->base.bind_addr; @@ -5496,9 +5522,10 @@ sctp_disposition_t sctp_sf_t1_cookie_timer_expire(const struct sctp_endpoint *ep { struct sctp_chunk *repl = NULL; int attempts = asoc->init_err_counter + 1; + struct net *net = sock_net(asoc->base.sk); SCTP_DEBUG_PRINTK("Timer T1 expired (COOKIE-ECHO).\n"); - SCTP_INC_STATS(SCTP_MIB_T1_COOKIE_EXPIREDS); + SCTP_INC_STATS(net, SCTP_MIB_T1_COOKIE_EXPIREDS); if (attempts <= asoc->max_init_attempts) { repl = sctp_make_cookie_echo(asoc, NULL); @@ -5543,9 +5570,10 @@ sctp_disposition_t sctp_sf_t2_timer_expire(const struct sctp_endpoint *ep, sctp_cmd_seq_t *commands) { struct sctp_chunk *reply = NULL; + struct net *net = sock_net(asoc->base.sk); SCTP_DEBUG_PRINTK("Timer T2 expired.\n"); - SCTP_INC_STATS(SCTP_MIB_T2_SHUTDOWN_EXPIREDS); + SCTP_INC_STATS(net, SCTP_MIB_T2_SHUTDOWN_EXPIREDS); ((struct sctp_association *)asoc)->shutdown_retries++; @@ -5555,8 +5583,8 @@ sctp_disposition_t sctp_sf_t2_timer_expire(const struct sctp_endpoint *ep, /* Note: CMD_ASSOC_FAILED calls CMD_DELETE_TCB. */ sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(SCTP_ERROR_NO_ERROR)); - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); - SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); + SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); + SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); return SCTP_DISPOSITION_DELETE_TCB; } @@ -5613,8 +5641,9 @@ sctp_disposition_t sctp_sf_t4_timer_expire( { struct sctp_chunk *chunk = asoc->addip_last_asconf; struct sctp_transport *transport = chunk->transport; + struct net *net = sock_net(asoc->base.sk); - SCTP_INC_STATS(SCTP_MIB_T4_RTO_EXPIREDS); + SCTP_INC_STATS(net, SCTP_MIB_T4_RTO_EXPIREDS); /* ADDIP 4.1 B1) Increment the error counters and perform path failure * detection on the appropriate destination address as defined in @@ -5639,8 +5668,8 @@ sctp_disposition_t sctp_sf_t4_timer_expire( SCTP_ERROR(ETIMEDOUT)); sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(SCTP_ERROR_NO_ERROR)); - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); - SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); + SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); + SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); return SCTP_DISPOSITION_ABORT; } @@ -5682,9 +5711,10 @@ sctp_disposition_t sctp_sf_t5_timer_expire(const struct sctp_endpoint *ep, sctp_cmd_seq_t *commands) { struct sctp_chunk *reply = NULL; + struct net *net = sock_net(asoc->base.sk); SCTP_DEBUG_PRINTK("Timer T5 expired.\n"); - SCTP_INC_STATS(SCTP_MIB_T5_SHUTDOWN_GUARD_EXPIREDS); + SCTP_INC_STATS(net, SCTP_MIB_T5_SHUTDOWN_GUARD_EXPIREDS); reply = sctp_make_abort(asoc, NULL, 0); if (!reply) @@ -5696,8 +5726,8 @@ sctp_disposition_t sctp_sf_t5_timer_expire(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(SCTP_ERROR_NO_ERROR)); - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); - SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); + SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); + SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); return SCTP_DISPOSITION_DELETE_TCB; nomem: @@ -5716,9 +5746,10 @@ sctp_disposition_t sctp_sf_autoclose_timer_expire( void *arg, sctp_cmd_seq_t *commands) { + struct net *net = sock_net(asoc->base.sk); int disposition; - SCTP_INC_STATS(SCTP_MIB_AUTOCLOSE_EXPIREDS); + SCTP_INC_STATS(net, SCTP_MIB_AUTOCLOSE_EXPIREDS); /* From 9.2 Shutdown of an Association * Upon receipt of the SHUTDOWN primitive from its upper @@ -5976,7 +6007,7 @@ static void sctp_send_stale_cookie_err(const struct sctp_endpoint *ep, sctp_packet_append_chunk(packet, err_chunk); sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT, SCTP_PACKET(packet)); - SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); + SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS); } else sctp_chunk_free (err_chunk); } @@ -5996,6 +6027,7 @@ static int sctp_eat_data(const struct sctp_association *asoc, __u32 tsn; struct sctp_tsnmap *map = (struct sctp_tsnmap *)&asoc->peer.tsn_map; struct sock *sk = asoc->base.sk; + struct net *net; u16 ssn; u16 sid; u8 ordered = 0; @@ -6112,6 +6144,7 @@ static int sctp_eat_data(const struct sctp_association *asoc, * No User Data: This error cause is returned to the originator of a * DATA chunk if a received DATA chunk has no user data. */ + net = sock_net(sk); if (unlikely(0 == datalen)) { err = sctp_make_abort_no_data(asoc, chunk, tsn); if (err) { @@ -6126,8 +6159,8 @@ static int sctp_eat_data(const struct sctp_association *asoc, SCTP_ERROR(ECONNABORTED)); sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(SCTP_ERROR_NO_DATA)); - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); - SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); + SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); + SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB); return SCTP_IERROR_NO_DATA; } @@ -6137,9 +6170,9 @@ static int sctp_eat_data(const struct sctp_association *asoc, * if we renege and the chunk arrives again. */ if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED) - SCTP_INC_STATS(SCTP_MIB_INUNORDERCHUNKS); + SCTP_INC_STATS(net, SCTP_MIB_INUNORDERCHUNKS); else { - SCTP_INC_STATS(SCTP_MIB_INORDERCHUNKS); + SCTP_INC_STATS(net, SCTP_MIB_INORDERCHUNKS); ordered = 1; } diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index f5a6a4f4faf7..360d8697b95c 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -326,7 +326,9 @@ static void sctp_ulpq_store_reasm(struct sctp_ulpq *ulpq, * payload was fragmented on the way and ip had to reassemble them. * We add the rest of skb's to the first skb's fraglist. */ -static struct sctp_ulpevent *sctp_make_reassembled_event(struct sk_buff_head *queue, struct sk_buff *f_frag, struct sk_buff *l_frag) +static struct sctp_ulpevent *sctp_make_reassembled_event(struct net *net, + struct sk_buff_head *queue, struct sk_buff *f_frag, + struct sk_buff *l_frag) { struct sk_buff *pos; struct sk_buff *new = NULL; @@ -394,7 +396,7 @@ static struct sctp_ulpevent *sctp_make_reassembled_event(struct sk_buff_head *qu } event = sctp_skb2event(f_frag); - SCTP_INC_STATS(SCTP_MIB_REASMUSRMSGS); + SCTP_INC_STATS(net, SCTP_MIB_REASMUSRMSGS); return event; } @@ -493,7 +495,8 @@ static struct sctp_ulpevent *sctp_ulpq_retrieve_reassembled(struct sctp_ulpq *ul cevent = sctp_skb2event(pd_first); pd_point = sctp_sk(asoc->base.sk)->pd_point; if (pd_point && pd_point <= pd_len) { - retval = sctp_make_reassembled_event(&ulpq->reasm, + retval = sctp_make_reassembled_event(sock_net(asoc->base.sk), + &ulpq->reasm, pd_first, pd_last); if (retval) @@ -503,7 +506,8 @@ static struct sctp_ulpevent *sctp_ulpq_retrieve_reassembled(struct sctp_ulpq *ul done: return retval; found: - retval = sctp_make_reassembled_event(&ulpq->reasm, first_frag, pos); + retval = sctp_make_reassembled_event(sock_net(ulpq->asoc->base.sk), + &ulpq->reasm, first_frag, pos); if (retval) retval->msg_flags |= MSG_EOR; goto done; @@ -563,7 +567,8 @@ static struct sctp_ulpevent *sctp_ulpq_retrieve_partial(struct sctp_ulpq *ulpq) * further. */ done: - retval = sctp_make_reassembled_event(&ulpq->reasm, first_frag, last_frag); + retval = sctp_make_reassembled_event(sock_net(ulpq->asoc->base.sk), + &ulpq->reasm, first_frag, last_frag); if (retval && is_last) retval->msg_flags |= MSG_EOR; @@ -655,7 +660,8 @@ static struct sctp_ulpevent *sctp_ulpq_retrieve_first(struct sctp_ulpq *ulpq) * further. */ done: - retval = sctp_make_reassembled_event(&ulpq->reasm, first_frag, last_frag); + retval = sctp_make_reassembled_event(sock_net(ulpq->asoc->base.sk), + &ulpq->reasm, first_frag, last_frag); return retval; } -- cgit v1.2.3 From ebb7e95d9351f77a8ec1fca20eb645051401b7b2 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 7 Aug 2012 07:23:59 +0000 Subject: sctp: Add infrastructure for per net sysctls Start with an empty sctp_net_table that will be populated as the various tunable sysctls are made per net. Signed-off-by: "Eric W. Biederman" Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/net/netns/sctp.h | 6 +++++- include/net/sctp/sctp.h | 4 ++++ net/sctp/protocol.c | 7 +++++++ net/sctp/sysctl.c | 21 +++++++++++++++++++++ 4 files changed, 37 insertions(+), 1 deletion(-) (limited to 'include/net/netns') diff --git a/include/net/netns/sctp.h b/include/net/netns/sctp.h index 06ccddf9566c..9576b60cbd25 100644 --- a/include/net/netns/sctp.h +++ b/include/net/netns/sctp.h @@ -4,6 +4,7 @@ struct sock; struct proc_dir_entry; struct sctp_mib; +struct ctl_table_header; struct netns_sctp { DEFINE_SNMP_STAT(struct sctp_mib, sctp_statistics); @@ -11,7 +12,9 @@ struct netns_sctp { #ifdef CONFIG_PROC_FS struct proc_dir_entry *proc_net_sctp; #endif - +#ifdef CONFIG_SYSCTL + struct ctl_table_header *sysctl_header; +#endif /* This is the global socket data structure used for responding to * the Out-of-the-blue (OOTB) packets. A control sock will be created * for this socket at the initialization time. @@ -32,6 +35,7 @@ struct netns_sctp { /* Lock that protects the local_addr_list writers */ spinlock_t local_addr_lock; + }; #endif /* __NETNS_SCTP_H__ */ diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 7dcd4dfd7c3f..c07421d1772f 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -375,9 +375,13 @@ static inline void sctp_dbg_objcnt_exit(struct net *) { return; } #if defined CONFIG_SYSCTL void sctp_sysctl_register(void); void sctp_sysctl_unregister(void); +int sctp_sysctl_net_register(struct net *net); +void sctp_sysctl_net_unregister(struct net *net); #else static inline void sctp_sysctl_register(void) { return; } static inline void sctp_sysctl_unregister(void) { return; } +static inline int sctp_sysctl_net_register(struct net *net) { return 0; } +static inline void sctp_sysctl_net_unregister(struct net *net) { return; } #endif /* Size of Supported Address Parameter for 'x' address types. */ diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index d58db315db85..0f2342be61f3 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1169,6 +1169,10 @@ static int sctp_net_init(struct net *net) { int status; + status = sctp_sysctl_net_register(net); + if (status) + goto err_sysctl_register; + /* Allocate and initialise sctp mibs. */ status = init_sctp_mibs(net); if (status) @@ -1208,6 +1212,8 @@ err_ctl_sock_init: err_init_proc: cleanup_sctp_mibs(net); err_init_mibs: + sctp_sysctl_net_unregister(net); +err_sysctl_register: return status; } @@ -1224,6 +1230,7 @@ static void sctp_net_exit(struct net *net) sctp_proc_exit(net); cleanup_sctp_mibs(net); + sctp_sysctl_net_unregister(net); } static struct pernet_operations sctp_net_ops = { diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index 2b2bfe933ff1..bee36c408dde 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -284,6 +284,27 @@ static ctl_table sctp_table[] = { { /* sentinel */ } }; +static ctl_table sctp_net_table[] = { + { /* sentinel */ } +}; + +int sctp_sysctl_net_register(struct net *net) +{ + struct ctl_table *table; + + table = kmemdup(sctp_net_table, sizeof(sctp_net_table), GFP_KERNEL); + if (!table) + return -ENOMEM; + + net->sctp.sysctl_header = register_net_sysctl(net, "net/sctp", table); + return 0; +} + +void sctp_sysctl_net_unregister(struct net *net) +{ + unregister_net_sysctl_table(net->sctp.sysctl_header); +} + static struct ctl_table_header * sctp_sysctl_header; /* Sysctl registration. */ -- cgit v1.2.3 From e1fc3b14f9a90d9591016749289f2c3d7b35fbf4 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 7 Aug 2012 07:29:57 +0000 Subject: sctp: Make sysctl tunables per net Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- include/net/netns/sctp.h | 90 ++++++++++++++++++++++ include/net/sctp/structs.h | 116 ---------------------------- net/sctp/associola.c | 10 ++- net/sctp/auth.c | 20 ++++- net/sctp/bind_addr.c | 2 +- net/sctp/endpointola.c | 9 ++- net/sctp/input.c | 2 +- net/sctp/protocol.c | 128 +++++++++++++++---------------- net/sctp/sm_make_chunk.c | 47 ++++++------ net/sctp/sm_statefuns.c | 4 +- net/sctp/sm_statetable.c | 6 +- net/sctp/socket.c | 65 +++++++++------- net/sctp/sysctl.c | 185 +++++++++++++++++++++++---------------------- net/sctp/transport.c | 15 ++-- 14 files changed, 355 insertions(+), 344 deletions(-) (limited to 'include/net/netns') diff --git a/include/net/netns/sctp.h b/include/net/netns/sctp.h index 9576b60cbd25..5e5eb1f9f14b 100644 --- a/include/net/netns/sctp.h +++ b/include/net/netns/sctp.h @@ -36,6 +36,96 @@ struct netns_sctp { /* Lock that protects the local_addr_list writers */ spinlock_t local_addr_lock; + /* RFC2960 Section 14. Suggested SCTP Protocol Parameter Values + * + * The following protocol parameters are RECOMMENDED: + * + * RTO.Initial - 3 seconds + * RTO.Min - 1 second + * RTO.Max - 60 seconds + * RTO.Alpha - 1/8 (3 when converted to right shifts.) + * RTO.Beta - 1/4 (2 when converted to right shifts.) + */ + unsigned int rto_initial; + unsigned int rto_min; + unsigned int rto_max; + + /* Note: rto_alpha and rto_beta are really defined as inverse + * powers of two to facilitate integer operations. + */ + int rto_alpha; + int rto_beta; + + /* Max.Burst - 4 */ + int max_burst; + + /* Whether Cookie Preservative is enabled(1) or not(0) */ + int cookie_preserve_enable; + + /* Valid.Cookie.Life - 60 seconds */ + unsigned int valid_cookie_life; + + /* Delayed SACK timeout 200ms default*/ + unsigned int sack_timeout; + + /* HB.interval - 30 seconds */ + unsigned int hb_interval; + + /* Association.Max.Retrans - 10 attempts + * Path.Max.Retrans - 5 attempts (per destination address) + * Max.Init.Retransmits - 8 attempts + */ + int max_retrans_association; + int max_retrans_path; + int max_retrans_init; + /* Potentially-Failed.Max.Retrans sysctl value + * taken from: + * http://tools.ietf.org/html/draft-nishida-tsvwg-sctp-failover-05 + */ + int pf_retrans; + + /* + * Policy for preforming sctp/socket accounting + * 0 - do socket level accounting, all assocs share sk_sndbuf + * 1 - do sctp accounting, each asoc may use sk_sndbuf bytes + */ + int sndbuf_policy; + + /* + * Policy for preforming sctp/socket accounting + * 0 - do socket level accounting, all assocs share sk_rcvbuf + * 1 - do sctp accounting, each asoc may use sk_rcvbuf bytes + */ + int rcvbuf_policy; + + int default_auto_asconf; + + /* Flag to indicate if addip is enabled. */ + int addip_enable; + int addip_noauth; + + /* Flag to indicate if PR-SCTP is enabled. */ + int prsctp_enable; + + /* Flag to idicate if SCTP-AUTH is enabled */ + int auth_enable; + + /* + * Policy to control SCTP IPv4 address scoping + * 0 - Disable IPv4 address scoping + * 1 - Enable IPv4 address scoping + * 2 - Selectively allow only IPv4 private addresses + * 3 - Selectively allow only IPv4 link local address + */ + int scope_policy; + + /* Threshold for rwnd update SACKS. Receive buffer shifted this many + * bits is an indicator of when to send and window update SACK. + */ + int rwnd_upd_shift; + + /* Threshold for autoclose timeout, in seconds. */ + unsigned long max_autoclose; }; #endif /* __NETNS_SCTP_H__ */ diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 18052b421203..0fef00f5d3ce 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -119,69 +119,6 @@ struct sctp_hashbucket { /* The SCTP globals structure. */ extern struct sctp_globals { - /* RFC2960 Section 14. Suggested SCTP Protocol Parameter Values - * - * The following protocol parameters are RECOMMENDED: - * - * RTO.Initial - 3 seconds - * RTO.Min - 1 second - * RTO.Max - 60 seconds - * RTO.Alpha - 1/8 (3 when converted to right shifts.) - * RTO.Beta - 1/4 (2 when converted to right shifts.) - */ - unsigned int rto_initial; - unsigned int rto_min; - unsigned int rto_max; - - /* Note: rto_alpha and rto_beta are really defined as inverse - * powers of two to facilitate integer operations. - */ - int rto_alpha; - int rto_beta; - - /* Max.Burst - 4 */ - int max_burst; - - /* Whether Cookie Preservative is enabled(1) or not(0) */ - int cookie_preserve_enable; - - /* Valid.Cookie.Life - 60 seconds */ - unsigned int valid_cookie_life; - - /* Delayed SACK timeout 200ms default*/ - unsigned int sack_timeout; - - /* HB.interval - 30 seconds */ - unsigned int hb_interval; - - /* Association.Max.Retrans - 10 attempts - * Path.Max.Retrans - 5 attempts (per destination address) - * Max.Init.Retransmits - 8 attempts - */ - int max_retrans_association; - int max_retrans_path; - int max_retrans_init; - - /* Potentially-Failed.Max.Retrans sysctl value - * taken from: - * http://tools.ietf.org/html/draft-nishida-tsvwg-sctp-failover-05 - */ - int pf_retrans; - - /* - * Policy for preforming sctp/socket accounting - * 0 - do socket level accounting, all assocs share sk_sndbuf - * 1 - do sctp accounting, each asoc may use sk_sndbuf bytes - */ - int sndbuf_policy; - - /* - * Policy for preforming sctp/socket accounting - * 0 - do socket level accounting, all assocs share sk_rcvbuf - * 1 - do sctp accounting, each asoc may use sk_rcvbuf bytes - */ - int rcvbuf_policy; - /* The following variables are implementation specific. */ /* Default initialization values to be applied to new associations. */ @@ -205,56 +142,11 @@ extern struct sctp_globals { int port_hashsize; struct sctp_bind_hashbucket *port_hashtable; - int default_auto_asconf; - - /* Flag to indicate if addip is enabled. */ - int addip_enable; - int addip_noauth_enable; - - /* Flag to indicate if PR-SCTP is enabled. */ - int prsctp_enable; - - /* Flag to idicate if SCTP-AUTH is enabled */ - int auth_enable; - - /* - * Policy to control SCTP IPv4 address scoping - * 0 - Disable IPv4 address scoping - * 1 - Enable IPv4 address scoping - * 2 - Selectively allow only IPv4 private addresses - * 3 - Selectively allow only IPv4 link local address - */ - int ipv4_scope_policy; - /* Flag to indicate whether computing and verifying checksum * is disabled. */ bool checksum_disable; - - /* Threshold for rwnd update SACKS. Receive buffer shifted this many - * bits is an indicator of when to send and window update SACK. - */ - int rwnd_update_shift; - - /* Threshold for autoclose timeout, in seconds. */ - unsigned long max_autoclose; } sctp_globals; -#define sctp_rto_initial (sctp_globals.rto_initial) -#define sctp_rto_min (sctp_globals.rto_min) -#define sctp_rto_max (sctp_globals.rto_max) -#define sctp_rto_alpha (sctp_globals.rto_alpha) -#define sctp_rto_beta (sctp_globals.rto_beta) -#define sctp_max_burst (sctp_globals.max_burst) -#define sctp_valid_cookie_life (sctp_globals.valid_cookie_life) -#define sctp_cookie_preserve_enable (sctp_globals.cookie_preserve_enable) -#define sctp_max_retrans_association (sctp_globals.max_retrans_association) -#define sctp_sndbuf_policy (sctp_globals.sndbuf_policy) -#define sctp_rcvbuf_policy (sctp_globals.rcvbuf_policy) -#define sctp_max_retrans_path (sctp_globals.max_retrans_path) -#define sctp_pf_retrans (sctp_globals.pf_retrans) -#define sctp_max_retrans_init (sctp_globals.max_retrans_init) -#define sctp_sack_timeout (sctp_globals.sack_timeout) -#define sctp_hb_interval (sctp_globals.hb_interval) #define sctp_max_instreams (sctp_globals.max_instreams) #define sctp_max_outstreams (sctp_globals.max_outstreams) #define sctp_address_families (sctp_globals.address_families) @@ -264,15 +156,7 @@ extern struct sctp_globals { #define sctp_assoc_hashtable (sctp_globals.assoc_hashtable) #define sctp_port_hashsize (sctp_globals.port_hashsize) #define sctp_port_hashtable (sctp_globals.port_hashtable) -#define sctp_default_auto_asconf (sctp_globals.default_auto_asconf) -#define sctp_scope_policy (sctp_globals.ipv4_scope_policy) -#define sctp_addip_enable (sctp_globals.addip_enable) -#define sctp_addip_noauth (sctp_globals.addip_noauth_enable) -#define sctp_prsctp_enable (sctp_globals.prsctp_enable) -#define sctp_auth_enable (sctp_globals.auth_enable) #define sctp_checksum_disable (sctp_globals.checksum_disable) -#define sctp_rwnd_upd_shift (sctp_globals.rwnd_update_shift) -#define sctp_max_autoclose (sctp_globals.max_autoclose) /* SCTP Socket type: UDP or TCP style. */ typedef enum { diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 93a4513c85e0..b1ef3bc301a5 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -82,6 +82,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a sctp_scope_t scope, gfp_t gfp) { + struct net *net = sock_net(sk); struct sctp_sock *sp; int i; sctp_paramhdr_t *p; @@ -124,7 +125,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a * socket values. */ asoc->max_retrans = sp->assocparams.sasoc_asocmaxrxt; - asoc->pf_retrans = sctp_pf_retrans; + asoc->pf_retrans = net->sctp.pf_retrans; asoc->rto_initial = msecs_to_jiffies(sp->rtoinfo.srto_initial); asoc->rto_max = msecs_to_jiffies(sp->rtoinfo.srto_max); @@ -175,7 +176,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a asoc->timeouts[SCTP_EVENT_TIMEOUT_HEARTBEAT] = 0; asoc->timeouts[SCTP_EVENT_TIMEOUT_SACK] = asoc->sackdelay; asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE] = - min_t(unsigned long, sp->autoclose, sctp_max_autoclose) * HZ; + min_t(unsigned long, sp->autoclose, net->sctp.max_autoclose) * HZ; /* Initializes the timers */ for (i = SCTP_EVENT_TIMEOUT_NONE; i < SCTP_NUM_TIMEOUT_TYPES; ++i) @@ -281,7 +282,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a * and will revert old behavior. */ asoc->peer.asconf_capable = 0; - if (sctp_addip_noauth) + if (net->sctp.addip_noauth) asoc->peer.asconf_capable = 1; asoc->asconf_addr_del_pending = NULL; asoc->src_out_of_asoc_ok = 0; @@ -1418,6 +1419,7 @@ void sctp_assoc_sync_pmtu(struct sock *sk, struct sctp_association *asoc) /* Should we send a SACK to update our peer? */ static inline int sctp_peer_needs_update(struct sctp_association *asoc) { + struct net *net = sock_net(asoc->base.sk); switch (asoc->state) { case SCTP_STATE_ESTABLISHED: case SCTP_STATE_SHUTDOWN_PENDING: @@ -1425,7 +1427,7 @@ static inline int sctp_peer_needs_update(struct sctp_association *asoc) case SCTP_STATE_SHUTDOWN_SENT: if ((asoc->rwnd > asoc->a_rwnd) && ((asoc->rwnd - asoc->a_rwnd) >= max_t(__u32, - (asoc->base.sk->sk_rcvbuf >> sctp_rwnd_upd_shift), + (asoc->base.sk->sk_rcvbuf >> net->sctp.rwnd_upd_shift), asoc->pathmtu))) return 1; break; diff --git a/net/sctp/auth.c b/net/sctp/auth.c index bf812048cf6f..aaa6c121ecce 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -392,13 +392,14 @@ nomem: */ int sctp_auth_asoc_init_active_key(struct sctp_association *asoc, gfp_t gfp) { + struct net *net = sock_net(asoc->base.sk); struct sctp_auth_bytes *secret; struct sctp_shared_key *ep_key; /* If we don't support AUTH, or peer is not capable * we don't need to do anything. */ - if (!sctp_auth_enable || !asoc->peer.auth_capable) + if (!net->sctp.auth_enable || !asoc->peer.auth_capable) return 0; /* If the key_id is non-zero and we couldn't find an @@ -445,11 +446,12 @@ struct sctp_shared_key *sctp_auth_get_shkey( */ int sctp_auth_init_hmacs(struct sctp_endpoint *ep, gfp_t gfp) { + struct net *net = sock_net(ep->base.sk); struct crypto_hash *tfm = NULL; __u16 id; /* if the transforms are already allocted, we are done */ - if (!sctp_auth_enable) { + if (!net->sctp.auth_enable) { ep->auth_hmacs = NULL; return 0; } @@ -674,7 +676,12 @@ static int __sctp_auth_cid(sctp_cid_t chunk, struct sctp_chunks_param *param) /* Check if peer requested that this chunk is authenticated */ int sctp_auth_send_cid(sctp_cid_t chunk, const struct sctp_association *asoc) { - if (!sctp_auth_enable || !asoc || !asoc->peer.auth_capable) + struct net *net; + if (!asoc) + return 0; + + net = sock_net(asoc->base.sk); + if (!net->sctp.auth_enable || !asoc->peer.auth_capable) return 0; return __sctp_auth_cid(chunk, asoc->peer.peer_chunks); @@ -683,7 +690,12 @@ int sctp_auth_send_cid(sctp_cid_t chunk, const struct sctp_association *asoc) /* Check if we requested that peer authenticate this chunk. */ int sctp_auth_recv_cid(sctp_cid_t chunk, const struct sctp_association *asoc) { - if (!sctp_auth_enable || !asoc) + struct net *net; + if (!asoc) + return 0; + + net = sock_net(asoc->base.sk); + if (!net->sctp.auth_enable); return 0; return __sctp_auth_cid(chunk, diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c index 23389ba44e39..d886b3bf84f5 100644 --- a/net/sctp/bind_addr.c +++ b/net/sctp/bind_addr.c @@ -512,7 +512,7 @@ int sctp_in_scope(struct net *net, const union sctp_addr *addr, sctp_scope_t sco * Address scoping can be selectively controlled via sysctl * option */ - switch (sctp_scope_policy) { + switch (net->sctp.scope_policy) { case SCTP_SCOPE_POLICY_DISABLE: return 1; case SCTP_SCOPE_POLICY_ENABLE: diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 8315792ef2ba..1859e2bc83d1 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -65,6 +65,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, struct sock *sk, gfp_t gfp) { + struct net *net = sock_net(sk); struct sctp_hmac_algo_param *auth_hmacs = NULL; struct sctp_chunks_param *auth_chunks = NULL; struct sctp_shared_key *null_key; @@ -74,7 +75,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, if (!ep->digest) return NULL; - if (sctp_auth_enable) { + if (net->sctp.auth_enable) { /* Allocate space for HMACS and CHUNKS authentication * variables. There are arrays that we encode directly * into parameters to make the rest of the operations easier. @@ -106,7 +107,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, /* If the Add-IP functionality is enabled, we must * authenticate, ASCONF and ASCONF-ACK chunks */ - if (sctp_addip_enable) { + if (net->sctp.addip_enable) { auth_chunks->chunks[0] = SCTP_CID_ASCONF; auth_chunks->chunks[1] = SCTP_CID_ASCONF_ACK; auth_chunks->param_hdr.length = @@ -140,14 +141,14 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, INIT_LIST_HEAD(&ep->asocs); /* Use SCTP specific send buffer space queues. */ - ep->sndbuf_policy = sctp_sndbuf_policy; + ep->sndbuf_policy = net->sctp.sndbuf_policy; sk->sk_data_ready = sctp_data_ready; sk->sk_write_space = sctp_write_space; sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); /* Get the receive buffer policy for this endpoint */ - ep->rcvbuf_policy = sctp_rcvbuf_policy; + ep->rcvbuf_policy = net->sctp.rcvbuf_policy; /* Initialize the secret key used with cookie. */ get_random_bytes(&ep->secret_key[0], SCTP_SECRET_SIZE); diff --git a/net/sctp/input.c b/net/sctp/input.c index a2ceb70ee06c..25dfe7380479 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -1094,7 +1094,7 @@ static struct sctp_association *__sctp_rcv_walk_lookup(struct net *net, break; case SCTP_CID_ASCONF: - if (have_auth || sctp_addip_noauth) + if (have_auth || net->sctp.addip_noauth) asoc = __sctp_rcv_asconf_lookup( net, ch, laddr, sctp_hdr(skb)->source, diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 59965bdea07a..2d518425d598 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1169,6 +1169,70 @@ static int sctp_net_init(struct net *net) { int status; + /* + * 14. Suggested SCTP Protocol Parameter Values + */ + /* The following protocol parameters are RECOMMENDED: */ + /* RTO.Initial - 3 seconds */ + net->sctp.rto_initial = SCTP_RTO_INITIAL; + /* RTO.Min - 1 second */ + net->sctp.rto_min = SCTP_RTO_MIN; + /* RTO.Max - 60 seconds */ + net->sctp.rto_max = SCTP_RTO_MAX; + /* RTO.Alpha - 1/8 */ + net->sctp.rto_alpha = SCTP_RTO_ALPHA; + /* RTO.Beta - 1/4 */ + net->sctp.rto_beta = SCTP_RTO_BETA; + + /* Valid.Cookie.Life - 60 seconds */ + net->sctp.valid_cookie_life = SCTP_DEFAULT_COOKIE_LIFE; + + /* Whether Cookie Preservative is enabled(1) or not(0) */ + net->sctp.cookie_preserve_enable = 1; + + /* Max.Burst - 4 */ + net->sctp.max_burst = SCTP_DEFAULT_MAX_BURST; + + /* Association.Max.Retrans - 10 attempts + * Path.Max.Retrans - 5 attempts (per destination address) + * Max.Init.Retransmits - 8 attempts + */ + net->sctp.max_retrans_association = 10; + net->sctp.max_retrans_path = 5; + net->sctp.max_retrans_init = 8; + + /* Sendbuffer growth - do per-socket accounting */ + net->sctp.sndbuf_policy = 0; + + /* Rcvbuffer growth - do per-socket accounting */ + net->sctp.rcvbuf_policy = 0; + + /* HB.interval - 30 seconds */ + net->sctp.hb_interval = SCTP_DEFAULT_TIMEOUT_HEARTBEAT; + + /* delayed SACK timeout */ + net->sctp.sack_timeout = SCTP_DEFAULT_TIMEOUT_SACK; + + /* Disable ADDIP by default. */ + net->sctp.addip_enable = 0; + net->sctp.addip_noauth = 0; + net->sctp.default_auto_asconf = 0; + + /* Enable PR-SCTP by default. */ + net->sctp.prsctp_enable = 1; + + /* Disable AUTH by default. */ + net->sctp.auth_enable = 0; + + /* Set SCOPE policy to enabled */ + net->sctp.scope_policy = SCTP_SCOPE_POLICY_ENABLE; + + /* Set the default rwnd update threshold */ + net->sctp.rwnd_upd_shift = SCTP_DEFAULT_RWND_SHIFT; + + /* Initialize maximum autoclose timeout. */ + net->sctp.max_autoclose = INT_MAX / HZ; + status = sctp_sysctl_net_register(net); if (status) goto err_sysctl_register; @@ -1272,59 +1336,12 @@ SCTP_STATIC __init int sctp_init(void) if (status) goto err_percpu_counter_init; - /* - * 14. Suggested SCTP Protocol Parameter Values - */ - /* The following protocol parameters are RECOMMENDED: */ - /* RTO.Initial - 3 seconds */ - sctp_rto_initial = SCTP_RTO_INITIAL; - /* RTO.Min - 1 second */ - sctp_rto_min = SCTP_RTO_MIN; - /* RTO.Max - 60 seconds */ - sctp_rto_max = SCTP_RTO_MAX; - /* RTO.Alpha - 1/8 */ - sctp_rto_alpha = SCTP_RTO_ALPHA; - /* RTO.Beta - 1/4 */ - sctp_rto_beta = SCTP_RTO_BETA; - - /* Valid.Cookie.Life - 60 seconds */ - sctp_valid_cookie_life = SCTP_DEFAULT_COOKIE_LIFE; - - /* Whether Cookie Preservative is enabled(1) or not(0) */ - sctp_cookie_preserve_enable = 1; - - /* Max.Burst - 4 */ - sctp_max_burst = SCTP_DEFAULT_MAX_BURST; - - /* Association.Max.Retrans - 10 attempts - * Path.Max.Retrans - 5 attempts (per destination address) - * Max.Init.Retransmits - 8 attempts - */ - sctp_max_retrans_association = 10; - sctp_max_retrans_path = 5; - sctp_max_retrans_init = 8; - - /* Sendbuffer growth - do per-socket accounting */ - sctp_sndbuf_policy = 0; - - /* Rcvbuffer growth - do per-socket accounting */ - sctp_rcvbuf_policy = 0; - - /* HB.interval - 30 seconds */ - sctp_hb_interval = SCTP_DEFAULT_TIMEOUT_HEARTBEAT; - - /* delayed SACK timeout */ - sctp_sack_timeout = SCTP_DEFAULT_TIMEOUT_SACK; - /* Implementation specific variables. */ /* Initialize default stream count setup information. */ sctp_max_instreams = SCTP_DEFAULT_INSTREAMS; sctp_max_outstreams = SCTP_DEFAULT_OUTSTREAMS; - /* Initialize maximum autoclose timeout. */ - sctp_max_autoclose = INT_MAX / HZ; - /* Initialize handle used for association ids. */ idr_init(&sctp_assocs_id); @@ -1411,23 +1428,6 @@ SCTP_STATIC __init int sctp_init(void) pr_info("Hash tables configured (established %d bind %d)\n", sctp_assoc_hashsize, sctp_port_hashsize); - /* Disable ADDIP by default. */ - sctp_addip_enable = 0; - sctp_addip_noauth = 0; - sctp_default_auto_asconf = 0; - - /* Enable PR-SCTP by default. */ - sctp_prsctp_enable = 1; - - /* Disable AUTH by default. */ - sctp_auth_enable = 0; - - /* Set SCOPE policy to enabled */ - sctp_scope_policy = SCTP_SCOPE_POLICY_ENABLE; - - /* Set the default rwnd update threshold */ - sctp_rwnd_upd_shift = SCTP_DEFAULT_RWND_SHIFT; - sctp_sysctl_register(); INIT_LIST_HEAD(&sctp_address_families); diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index a4b096f85a68..fbe1636309a7 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -198,6 +198,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, const struct sctp_bind_addr *bp, gfp_t gfp, int vparam_len) { + struct net *net = sock_net(asoc->base.sk); sctp_inithdr_t init; union sctp_params addrs; size_t chunksize; @@ -237,7 +238,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, chunksize += WORD_ROUND(SCTP_SAT_LEN(num_types)); chunksize += sizeof(ecap_param); - if (sctp_prsctp_enable) + if (net->sctp.prsctp_enable) chunksize += sizeof(prsctp_param); /* ADDIP: Section 4.2.7: @@ -245,7 +246,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, * the ASCONF,the ASCONF-ACK, and the AUTH chunks in its INIT and * INIT-ACK parameters. */ - if (sctp_addip_enable) { + if (net->sctp.addip_enable) { extensions[num_ext] = SCTP_CID_ASCONF; extensions[num_ext+1] = SCTP_CID_ASCONF_ACK; num_ext += 2; @@ -257,7 +258,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, chunksize += vparam_len; /* Account for AUTH related parameters */ - if (sctp_auth_enable) { + if (net->sctp.auth_enable) { /* Add random parameter length*/ chunksize += sizeof(asoc->c.auth_random); @@ -331,7 +332,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, sctp_addto_param(retval, num_ext, extensions); } - if (sctp_prsctp_enable) + if (net->sctp.prsctp_enable) sctp_addto_chunk(retval, sizeof(prsctp_param), &prsctp_param); if (sp->adaptation_ind) { @@ -342,7 +343,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, } /* Add SCTP-AUTH chunks to the parameter list */ - if (sctp_auth_enable) { + if (net->sctp.auth_enable) { sctp_addto_chunk(retval, sizeof(asoc->c.auth_random), asoc->c.auth_random); if (auth_hmacs) @@ -1964,10 +1965,10 @@ static int sctp_verify_ext_param(struct net *net, union sctp_params param) * only if ADD-IP is turned on and we are not backward-compatible * mode. */ - if (sctp_addip_noauth) + if (net->sctp.addip_noauth) return 1; - if (sctp_addip_enable && !have_auth && have_asconf) + if (net->sctp.addip_enable && !have_auth && have_asconf) return 0; return 1; @@ -1976,13 +1977,14 @@ static int sctp_verify_ext_param(struct net *net, union sctp_params param) static void sctp_process_ext_param(struct sctp_association *asoc, union sctp_params param) { + struct net *net = sock_net(asoc->base.sk); __u16 num_ext = ntohs(param.p->length) - sizeof(sctp_paramhdr_t); int i; for (i = 0; i < num_ext; i++) { switch (param.ext->chunks[i]) { case SCTP_CID_FWD_TSN: - if (sctp_prsctp_enable && + if (net->sctp.prsctp_enable && !asoc->peer.prsctp_capable) asoc->peer.prsctp_capable = 1; break; @@ -1990,12 +1992,12 @@ static void sctp_process_ext_param(struct sctp_association *asoc, /* if the peer reports AUTH, assume that he * supports AUTH. */ - if (sctp_auth_enable) + if (net->sctp.auth_enable) asoc->peer.auth_capable = 1; break; case SCTP_CID_ASCONF: case SCTP_CID_ASCONF_ACK: - if (sctp_addip_enable) + if (net->sctp.addip_enable) asoc->peer.asconf_capable = 1; break; default: @@ -2116,7 +2118,7 @@ static sctp_ierror_t sctp_verify_param(struct net *net, break; case SCTP_PARAM_SET_PRIMARY: - if (sctp_addip_enable) + if (net->sctp.addip_enable) break; goto fallthrough; @@ -2127,12 +2129,12 @@ static sctp_ierror_t sctp_verify_param(struct net *net, break; case SCTP_PARAM_FWD_TSN_SUPPORT: - if (sctp_prsctp_enable) + if (net->sctp.prsctp_enable) break; goto fallthrough; case SCTP_PARAM_RANDOM: - if (!sctp_auth_enable) + if (!net->sctp.auth_enable) goto fallthrough; /* SCTP-AUTH: Secion 6.1 @@ -2149,7 +2151,7 @@ static sctp_ierror_t sctp_verify_param(struct net *net, break; case SCTP_PARAM_CHUNKS: - if (!sctp_auth_enable) + if (!net->sctp.auth_enable) goto fallthrough; /* SCTP-AUTH: Section 3.2 @@ -2165,7 +2167,7 @@ static sctp_ierror_t sctp_verify_param(struct net *net, break; case SCTP_PARAM_HMAC_ALGO: - if (!sctp_auth_enable) + if (!net->sctp.auth_enable) goto fallthrough; hmacs = (struct sctp_hmac_algo_param *)param.p; @@ -2271,6 +2273,7 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk, const union sctp_addr *peer_addr, sctp_init_chunk_t *peer_init, gfp_t gfp) { + struct net *net = sock_net(asoc->base.sk); union sctp_params param; struct sctp_transport *transport; struct list_head *pos, *temp; @@ -2327,7 +2330,7 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk, * also give us an option to silently ignore the packet, which * is what we'll do here. */ - if (!sctp_addip_noauth && + if (!net->sctp.addip_noauth && (asoc->peer.asconf_capable && !asoc->peer.auth_capable)) { asoc->peer.addip_disabled_mask |= (SCTP_PARAM_ADD_IP | SCTP_PARAM_DEL_IP | @@ -2502,7 +2505,7 @@ do_addr_param: break; case SCTP_PARAM_COOKIE_PRESERVATIVE: - if (!sctp_cookie_preserve_enable) + if (!net->sctp.cookie_preserve_enable) break; stale = ntohl(param.life->lifespan_increment); @@ -2582,7 +2585,7 @@ do_addr_param: break; case SCTP_PARAM_SET_PRIMARY: - if (!sctp_addip_enable) + if (!net->sctp.addip_enable) goto fall_through; addr_param = param.v + sizeof(sctp_addip_param_t); @@ -2609,7 +2612,7 @@ do_addr_param: break; case SCTP_PARAM_FWD_TSN_SUPPORT: - if (sctp_prsctp_enable) { + if (net->sctp.prsctp_enable) { asoc->peer.prsctp_capable = 1; break; } @@ -2617,7 +2620,7 @@ do_addr_param: goto fall_through; case SCTP_PARAM_RANDOM: - if (!sctp_auth_enable) + if (!net->sctp.auth_enable) goto fall_through; /* Save peer's random parameter */ @@ -2630,7 +2633,7 @@ do_addr_param: break; case SCTP_PARAM_HMAC_ALGO: - if (!sctp_auth_enable) + if (!net->sctp.auth_enable) goto fall_through; /* Save peer's HMAC list */ @@ -2646,7 +2649,7 @@ do_addr_param: break; case SCTP_PARAM_CHUNKS: - if (!sctp_auth_enable) + if (!net->sctp.auth_enable) goto fall_through; asoc->peer.peer_chunks = kmemdup(param.p, diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index e17ada47afc4..094813b6c3c3 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -3586,7 +3586,7 @@ sctp_disposition_t sctp_sf_do_asconf(struct net *net, * is received unauthenticated it MUST be silently discarded as * described in [I-D.ietf-tsvwg-sctp-auth]. */ - if (!sctp_addip_noauth && !chunk->auth) + if (!net->sctp.addip_noauth && !chunk->auth) return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); /* Make sure that the ASCONF ADDIP chunk has a valid length. */ @@ -3713,7 +3713,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net, * is received unauthenticated it MUST be silently discarded as * described in [I-D.ietf-tsvwg-sctp-auth]. */ - if (!sctp_addip_noauth && !asconf_ack->auth) + if (!net->sctp.addip_noauth && !asconf_ack->auth) return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); /* Make sure that the ADDIP chunk has a valid length. */ diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c index 4a029d798287..84d98d8a5a74 100644 --- a/net/sctp/sm_statetable.c +++ b/net/sctp/sm_statetable.c @@ -918,12 +918,12 @@ static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(struct net *net, if (cid <= SCTP_CID_BASE_MAX) return &chunk_event_table[cid][state]; - if (sctp_prsctp_enable) { + if (net->sctp.prsctp_enable) { if (cid == SCTP_CID_FWD_TSN) return &prsctp_chunk_event_table[0][state]; } - if (sctp_addip_enable) { + if (net->sctp.addip_enable) { if (cid == SCTP_CID_ASCONF) return &addip_chunk_event_table[0][state]; @@ -931,7 +931,7 @@ static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(struct net *net, return &addip_chunk_event_table[1][state]; } - if (sctp_auth_enable) { + if (net->sctp.auth_enable) { if (cid == SCTP_CID_AUTH) return &auth_chunk_event_table[0][state]; } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index a6a4226a922f..d37d24ff197f 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -516,6 +516,7 @@ static int sctp_send_asconf_add_ip(struct sock *sk, struct sockaddr *addrs, int addrcnt) { + struct net *net = sock_net(sk); struct sctp_sock *sp; struct sctp_endpoint *ep; struct sctp_association *asoc; @@ -530,7 +531,7 @@ static int sctp_send_asconf_add_ip(struct sock *sk, int i; int retval = 0; - if (!sctp_addip_enable) + if (!net->sctp.addip_enable) return retval; sp = sctp_sk(sk); @@ -718,6 +719,7 @@ static int sctp_send_asconf_del_ip(struct sock *sk, struct sockaddr *addrs, int addrcnt) { + struct net *net = sock_net(sk); struct sctp_sock *sp; struct sctp_endpoint *ep; struct sctp_association *asoc; @@ -733,7 +735,7 @@ static int sctp_send_asconf_del_ip(struct sock *sk, int stored = 0; chunk = NULL; - if (!sctp_addip_enable) + if (!net->sctp.addip_enable) return retval; sp = sctp_sk(sk); @@ -3039,6 +3041,7 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optval, unsigned int optlen) { + struct net *net = sock_net(sk); struct sctp_sock *sp; struct sctp_association *asoc = NULL; struct sctp_setpeerprim prim; @@ -3048,7 +3051,7 @@ static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optva sp = sctp_sk(sk); - if (!sctp_addip_enable) + if (!net->sctp.addip_enable) return -EPERM; if (optlen != sizeof(struct sctp_setpeerprim)) @@ -3285,9 +3288,10 @@ static int sctp_setsockopt_auth_chunk(struct sock *sk, char __user *optval, unsigned int optlen) { + struct net *net = sock_net(sk); struct sctp_authchunk val; - if (!sctp_auth_enable) + if (!net->sctp.auth_enable) return -EACCES; if (optlen != sizeof(struct sctp_authchunk)) @@ -3317,11 +3321,12 @@ static int sctp_setsockopt_hmac_ident(struct sock *sk, char __user *optval, unsigned int optlen) { + struct net *net = sock_net(sk); struct sctp_hmacalgo *hmacs; u32 idents; int err; - if (!sctp_auth_enable) + if (!net->sctp.auth_enable) return -EACCES; if (optlen < sizeof(struct sctp_hmacalgo)) @@ -3354,11 +3359,12 @@ static int sctp_setsockopt_auth_key(struct sock *sk, char __user *optval, unsigned int optlen) { + struct net *net = sock_net(sk); struct sctp_authkey *authkey; struct sctp_association *asoc; int ret; - if (!sctp_auth_enable) + if (!net->sctp.auth_enable) return -EACCES; if (optlen <= sizeof(struct sctp_authkey)) @@ -3395,10 +3401,11 @@ static int sctp_setsockopt_active_key(struct sock *sk, char __user *optval, unsigned int optlen) { + struct net *net = sock_net(sk); struct sctp_authkeyid val; struct sctp_association *asoc; - if (!sctp_auth_enable) + if (!net->sctp.auth_enable) return -EACCES; if (optlen != sizeof(struct sctp_authkeyid)) @@ -3423,10 +3430,11 @@ static int sctp_setsockopt_del_key(struct sock *sk, char __user *optval, unsigned int optlen) { + struct net *net = sock_net(sk); struct sctp_authkeyid val; struct sctp_association *asoc; - if (!sctp_auth_enable) + if (!net->sctp.auth_enable) return -EACCES; if (optlen != sizeof(struct sctp_authkeyid)) @@ -3849,6 +3857,7 @@ out: */ SCTP_STATIC int sctp_init_sock(struct sock *sk) { + struct net *net = sock_net(sk); struct sctp_endpoint *ep; struct sctp_sock *sp; @@ -3878,7 +3887,7 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) sp->default_timetolive = 0; sp->default_rcv_context = 0; - sp->max_burst = sctp_max_burst; + sp->max_burst = net->sctp.max_burst; /* Initialize default setup parameters. These parameters * can be modified with the SCTP_INITMSG socket option or @@ -3886,24 +3895,24 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) */ sp->initmsg.sinit_num_ostreams = sctp_max_outstreams; sp->initmsg.sinit_max_instreams = sctp_max_instreams; - sp->initmsg.sinit_max_attempts = sctp_max_retrans_init; - sp->initmsg.sinit_max_init_timeo = sctp_rto_max; + sp->initmsg.sinit_max_attempts = net->sctp.max_retrans_init; + sp->initmsg.sinit_max_init_timeo = net->sctp.rto_max; /* Initialize default RTO related parameters. These parameters can * be modified for with the SCTP_RTOINFO socket option. */ - sp->rtoinfo.srto_initial = sctp_rto_initial; - sp->rtoinfo.srto_max = sctp_rto_max; - sp->rtoinfo.srto_min = sctp_rto_min; + sp->rtoinfo.srto_initial = net->sctp.rto_initial; + sp->rtoinfo.srto_max = net->sctp.rto_max; + sp->rtoinfo.srto_min = net->sctp.rto_min; /* Initialize default association related parameters. These parameters * can be modified with the SCTP_ASSOCINFO socket option. */ - sp->assocparams.sasoc_asocmaxrxt = sctp_max_retrans_association; + sp->assocparams.sasoc_asocmaxrxt = net->sctp.max_retrans_association; sp->assocparams.sasoc_number_peer_destinations = 0; sp->assocparams.sasoc_peer_rwnd = 0; sp->assocparams.sasoc_local_rwnd = 0; - sp->assocparams.sasoc_cookie_life = sctp_valid_cookie_life; + sp->assocparams.sasoc_cookie_life = net->sctp.valid_cookie_life; /* Initialize default event subscriptions. By default, all the * options are off. @@ -3913,10 +3922,10 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) /* Default Peer Address Parameters. These defaults can * be modified via SCTP_PEER_ADDR_PARAMS */ - sp->hbinterval = sctp_hb_interval; - sp->pathmaxrxt = sctp_max_retrans_path; + sp->hbinterval = net->sctp.hb_interval; + sp->pathmaxrxt = net->sctp.max_retrans_path; sp->pathmtu = 0; // allow default discovery - sp->sackdelay = sctp_sack_timeout; + sp->sackdelay = net->sctp.sack_timeout; sp->sackfreq = 2; sp->param_flags = SPP_HB_ENABLE | SPP_PMTUD_ENABLE | @@ -3967,10 +3976,10 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) local_bh_disable(); percpu_counter_inc(&sctp_sockets_allocated); - sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); - if (sctp_default_auto_asconf) { + sock_prot_inuse_add(net, sk->sk_prot, 1); + if (net->sctp.default_auto_asconf) { list_add_tail(&sp->auto_asconf_list, - &sock_net(sk)->sctp.auto_asconf_splist); + &net->sctp.auto_asconf_splist); sp->do_auto_asconf = 1; } else sp->do_auto_asconf = 0; @@ -5307,12 +5316,13 @@ static int sctp_getsockopt_maxburst(struct sock *sk, int len, static int sctp_getsockopt_hmac_ident(struct sock *sk, int len, char __user *optval, int __user *optlen) { + struct net *net = sock_net(sk); struct sctp_hmacalgo __user *p = (void __user *)optval; struct sctp_hmac_algo_param *hmacs; __u16 data_len = 0; u32 num_idents; - if (!sctp_auth_enable) + if (!net->sctp.auth_enable) return -EACCES; hmacs = sctp_sk(sk)->ep->auth_hmacs_list; @@ -5336,10 +5346,11 @@ static int sctp_getsockopt_hmac_ident(struct sock *sk, int len, static int sctp_getsockopt_active_key(struct sock *sk, int len, char __user *optval, int __user *optlen) { + struct net *net = sock_net(sk); struct sctp_authkeyid val; struct sctp_association *asoc; - if (!sctp_auth_enable) + if (!net->sctp.auth_enable) return -EACCES; if (len < sizeof(struct sctp_authkeyid)) @@ -5368,6 +5379,7 @@ static int sctp_getsockopt_active_key(struct sock *sk, int len, static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len, char __user *optval, int __user *optlen) { + struct net *net = sock_net(sk); struct sctp_authchunks __user *p = (void __user *)optval; struct sctp_authchunks val; struct sctp_association *asoc; @@ -5375,7 +5387,7 @@ static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len, u32 num_chunks = 0; char __user *to; - if (!sctp_auth_enable) + if (!net->sctp.auth_enable) return -EACCES; if (len < sizeof(struct sctp_authchunks)) @@ -5411,6 +5423,7 @@ num: static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len, char __user *optval, int __user *optlen) { + struct net *net = sock_net(sk); struct sctp_authchunks __user *p = (void __user *)optval; struct sctp_authchunks val; struct sctp_association *asoc; @@ -5418,7 +5431,7 @@ static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len, u32 num_chunks = 0; char __user *to; - if (!sctp_auth_enable) + if (!net->sctp.auth_enable) return -EACCES; if (len < sizeof(struct sctp_authchunks)) diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index bee36c408dde..70e3ba5cb50b 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -63,9 +63,35 @@ extern int sysctl_sctp_rmem[3]; extern int sysctl_sctp_wmem[3]; static ctl_table sctp_table[] = { + { + .procname = "sctp_mem", + .data = &sysctl_sctp_mem, + .maxlen = sizeof(sysctl_sctp_mem), + .mode = 0644, + .proc_handler = proc_doulongvec_minmax + }, + { + .procname = "sctp_rmem", + .data = &sysctl_sctp_rmem, + .maxlen = sizeof(sysctl_sctp_rmem), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "sctp_wmem", + .data = &sysctl_sctp_wmem, + .maxlen = sizeof(sysctl_sctp_wmem), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + + { /* sentinel */ } +}; + +static ctl_table sctp_net_table[] = { { .procname = "rto_initial", - .data = &sctp_rto_initial, + .data = &init_net.sctp.rto_initial, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, @@ -74,7 +100,7 @@ static ctl_table sctp_table[] = { }, { .procname = "rto_min", - .data = &sctp_rto_min, + .data = &init_net.sctp.rto_min, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, @@ -83,7 +109,7 @@ static ctl_table sctp_table[] = { }, { .procname = "rto_max", - .data = &sctp_rto_max, + .data = &init_net.sctp.rto_max, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, @@ -91,17 +117,22 @@ static ctl_table sctp_table[] = { .extra2 = &timer_max }, { - .procname = "valid_cookie_life", - .data = &sctp_valid_cookie_life, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &one, - .extra2 = &timer_max + .procname = "rto_alpha_exp_divisor", + .data = &init_net.sctp.rto_alpha, + .maxlen = sizeof(int), + .mode = 0444, + .proc_handler = proc_dointvec, + }, + { + .procname = "rto_beta_exp_divisor", + .data = &init_net.sctp.rto_beta, + .maxlen = sizeof(int), + .mode = 0444, + .proc_handler = proc_dointvec, }, { .procname = "max_burst", - .data = &sctp_max_burst, + .data = &init_net.sctp.max_burst, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, @@ -109,31 +140,42 @@ static ctl_table sctp_table[] = { .extra2 = &int_max }, { - .procname = "association_max_retrans", - .data = &sctp_max_retrans_association, + .procname = "cookie_preserve_enable", + .data = &init_net.sctp.cookie_preserve_enable, .maxlen = sizeof(int), .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "valid_cookie_life", + .data = &init_net.sctp.valid_cookie_life, + .maxlen = sizeof(unsigned int), + .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &one, - .extra2 = &int_max + .extra1 = &one, + .extra2 = &timer_max }, { - .procname = "sndbuf_policy", - .data = &sctp_sndbuf_policy, + .procname = "sack_timeout", + .data = &init_net.sctp.sack_timeout, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &sack_timer_min, + .extra2 = &sack_timer_max, }, { - .procname = "rcvbuf_policy", - .data = &sctp_rcvbuf_policy, - .maxlen = sizeof(int), + .procname = "hb_interval", + .data = &init_net.sctp.hb_interval, + .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, + .extra2 = &timer_max }, { - .procname = "path_max_retrans", - .data = &sctp_max_retrans_path, + .procname = "association_max_retrans", + .data = &init_net.sctp.max_retrans_association, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, @@ -141,17 +183,17 @@ static ctl_table sctp_table[] = { .extra2 = &int_max }, { - .procname = "pf_retrans", - .data = &sctp_pf_retrans, + .procname = "path_max_retrans", + .data = &init_net.sctp.max_retrans_path, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, + .extra1 = &one, .extra2 = &int_max }, { .procname = "max_init_retransmits", - .data = &sctp_max_retrans_init, + .data = &init_net.sctp.max_retrans_init, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, @@ -159,103 +201,66 @@ static ctl_table sctp_table[] = { .extra2 = &int_max }, { - .procname = "hb_interval", - .data = &sctp_hb_interval, - .maxlen = sizeof(unsigned int), + .procname = "pf_retrans", + .data = &init_net.sctp.pf_retrans, + .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &one, - .extra2 = &timer_max + .extra1 = &zero, + .extra2 = &int_max }, { - .procname = "cookie_preserve_enable", - .data = &sctp_cookie_preserve_enable, + .procname = "sndbuf_policy", + .data = &init_net.sctp.sndbuf_policy, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { - .procname = "rto_alpha_exp_divisor", - .data = &sctp_rto_alpha, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = proc_dointvec, - }, - { - .procname = "rto_beta_exp_divisor", - .data = &sctp_rto_beta, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = proc_dointvec, - }, - { - .procname = "addip_enable", - .data = &sctp_addip_enable, + .procname = "rcvbuf_policy", + .data = &init_net.sctp.rcvbuf_policy, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "default_auto_asconf", - .data = &sctp_default_auto_asconf, + .data = &init_net.sctp.default_auto_asconf, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { - .procname = "prsctp_enable", - .data = &sctp_prsctp_enable, + .procname = "addip_enable", + .data = &init_net.sctp.addip_enable, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { - .procname = "sack_timeout", - .data = &sctp_sack_timeout, + .procname = "addip_noauth_enable", + .data = &init_net.sctp.addip_noauth, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &sack_timer_min, - .extra2 = &sack_timer_max, - }, - { - .procname = "sctp_mem", - .data = &sysctl_sctp_mem, - .maxlen = sizeof(sysctl_sctp_mem), - .mode = 0644, - .proc_handler = proc_doulongvec_minmax - }, - { - .procname = "sctp_rmem", - .data = &sysctl_sctp_rmem, - .maxlen = sizeof(sysctl_sctp_rmem), - .mode = 0644, .proc_handler = proc_dointvec, }, { - .procname = "sctp_wmem", - .data = &sysctl_sctp_wmem, - .maxlen = sizeof(sysctl_sctp_wmem), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "auth_enable", - .data = &sctp_auth_enable, + .procname = "prsctp_enable", + .data = &init_net.sctp.prsctp_enable, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { - .procname = "addip_noauth_enable", - .data = &sctp_addip_noauth, + .procname = "auth_enable", + .data = &init_net.sctp.auth_enable, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "addr_scope_policy", - .data = &sctp_scope_policy, + .data = &init_net.sctp.scope_policy, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, @@ -264,7 +269,7 @@ static ctl_table sctp_table[] = { }, { .procname = "rwnd_update_shift", - .data = &sctp_rwnd_upd_shift, + .data = &init_net.sctp.rwnd_upd_shift, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_minmax, @@ -273,7 +278,7 @@ static ctl_table sctp_table[] = { }, { .procname = "max_autoclose", - .data = &sctp_max_autoclose, + .data = &init_net.sctp.max_autoclose, .maxlen = sizeof(unsigned long), .mode = 0644, .proc_handler = &proc_doulongvec_minmax, @@ -284,18 +289,18 @@ static ctl_table sctp_table[] = { { /* sentinel */ } }; -static ctl_table sctp_net_table[] = { - { /* sentinel */ } -}; - int sctp_sysctl_net_register(struct net *net) { struct ctl_table *table; + int i; table = kmemdup(sctp_net_table, sizeof(sctp_net_table), GFP_KERNEL); if (!table) return -ENOMEM; + for (i = 0; table[i].data; i++) + table[i].data += (char *)(&net->sctp) - (char *)&init_net.sctp; + net->sctp.sysctl_header = register_net_sysctl(net, "net/sctp", table); return 0; } diff --git a/net/sctp/transport.c b/net/sctp/transport.c index aada963c9d6b..953c21e4af97 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -77,7 +77,7 @@ static struct sctp_transport *sctp_transport_init(struct net *net, * given destination transport address, set RTO to the protocol * parameter 'RTO.Initial'. */ - peer->rto = msecs_to_jiffies(sctp_rto_initial); + peer->rto = msecs_to_jiffies(net->sctp.rto_initial); peer->last_time_heard = jiffies; peer->last_time_ecne_reduced = jiffies; @@ -87,8 +87,8 @@ static struct sctp_transport *sctp_transport_init(struct net *net, SPP_SACKDELAY_ENABLE; /* Initialize the default path max_retrans. */ - peer->pathmaxrxt = sctp_max_retrans_path; - peer->pf_retrans = sctp_pf_retrans; + peer->pathmaxrxt = net->sctp.max_retrans_path; + peer->pf_retrans = net->sctp.pf_retrans; INIT_LIST_HEAD(&peer->transmitted); INIT_LIST_HEAD(&peer->send_ready); @@ -318,6 +318,7 @@ void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt) SCTP_ASSERT(tp->rto_pending, "rto_pending not set", return); if (tp->rttvar || tp->srtt) { + struct net *net = sock_net(tp->asoc->base.sk); /* 6.3.1 C3) When a new RTT measurement R' is made, set * RTTVAR <- (1 - RTO.Beta) * RTTVAR + RTO.Beta * |SRTT - R'| * SRTT <- (1 - RTO.Alpha) * SRTT + RTO.Alpha * R' @@ -329,10 +330,10 @@ void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt) * For example, assuming the default value of RTO.Alpha of * 1/8, rto_alpha would be expressed as 3. */ - tp->rttvar = tp->rttvar - (tp->rttvar >> sctp_rto_beta) - + ((abs(tp->srtt - rtt)) >> sctp_rto_beta); - tp->srtt = tp->srtt - (tp->srtt >> sctp_rto_alpha) - + (rtt >> sctp_rto_alpha); + tp->rttvar = tp->rttvar - (tp->rttvar >> net->sctp.rto_beta) + + ((abs(tp->srtt - rtt)) >> net->sctp.rto_beta); + tp->srtt = tp->srtt - (tp->srtt >> net->sctp.rto_alpha) + + (rtt >> net->sctp.rto_alpha); } else { /* 6.3.1 C2) When the first RTT measurement R is made, set * SRTT <- R, RTTVAR <- R/2. -- cgit v1.2.3 From 0fa7fa98dbcc2789409ed24e885485e645803d7f Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 21 Aug 2012 01:06:47 +0000 Subject: packet: Protect packet sk list with mutex (v2) Change since v1: * Fixed inuse counters access spotted by Eric In patch eea68e2f (packet: Report socket mclist info via diag module) I've introduced a "scheduling in atomic" problem in packet diag module -- the socket list is traversed under rcu_read_lock() while performed under it sk mclist access requires rtnl lock (i.e. -- mutex) to be taken. [152363.820563] BUG: scheduling while atomic: crtools/12517/0x10000002 [152363.820573] 4 locks held by crtools/12517: [152363.820581] #0: (sock_diag_mutex){+.+.+.}, at: [] sock_diag_rcv+0x1f/0x3e [152363.820613] #1: (sock_diag_table_mutex){+.+.+.}, at: [] sock_diag_rcv_msg+0xdb/0x11a [152363.820644] #2: (nlk->cb_mutex){+.+.+.}, at: [] netlink_dump+0x23/0x1ab [152363.820693] #3: (rcu_read_lock){.+.+..}, at: [] packet_diag_dump+0x0/0x1af Similar thing was then re-introduced by further packet diag patches (fanount mutex and pgvec mutex for rings) :( Apart from being terribly sorry for the above, I propose to change the packet sk list protection from spinlock to mutex. This lock currently protects two modifications: * sklist * prot inuse counters The sklist modifications can be just reprotected with mutex since they already occur in a sleeping context. The inuse counters modifications are trickier -- the __this_cpu_-s are used inside, thus requiring the caller to handle the potential issues with contexts himself. Since packet sockets' counters are modified in two places only (packet_create and packet_release) we only need to protect the context from being preempted. BH disabling is not required in this case. Signed-off-by: Pavel Emelyanov Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/packet.h | 2 +- net/packet/af_packet.c | 16 +++++++++++----- net/packet/diag.c | 6 +++--- 3 files changed, 15 insertions(+), 9 deletions(-) (limited to 'include/net/netns') diff --git a/include/net/netns/packet.h b/include/net/netns/packet.h index cb4e894c0f8d..4780b080a436 100644 --- a/include/net/netns/packet.h +++ b/include/net/netns/packet.h @@ -8,7 +8,7 @@ #include struct netns_packet { - spinlock_t sklist_lock; + struct mutex sklist_lock; struct hlist_head sklist; }; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index bfaa0a83f0eb..f220c5bdb71f 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2317,10 +2317,13 @@ static int packet_release(struct socket *sock) net = sock_net(sk); po = pkt_sk(sk); - spin_lock_bh(&net->packet.sklist_lock); + mutex_lock(&net->packet.sklist_lock); sk_del_node_init_rcu(sk); + mutex_unlock(&net->packet.sklist_lock); + + preempt_disable(); sock_prot_inuse_add(net, sk->sk_prot, -1); - spin_unlock_bh(&net->packet.sklist_lock); + preempt_enable(); spin_lock(&po->bind_lock); unregister_prot_hook(sk, false); @@ -2519,10 +2522,13 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, register_prot_hook(sk); } - spin_lock_bh(&net->packet.sklist_lock); + mutex_lock(&net->packet.sklist_lock); sk_add_node_rcu(sk, &net->packet.sklist); + mutex_unlock(&net->packet.sklist_lock); + + preempt_disable(); sock_prot_inuse_add(net, &packet_proto, 1); - spin_unlock_bh(&net->packet.sklist_lock); + preempt_enable(); return 0; out: @@ -3775,7 +3781,7 @@ static const struct file_operations packet_seq_fops = { static int __net_init packet_net_init(struct net *net) { - spin_lock_init(&net->packet.sklist_lock); + mutex_init(&net->packet.sklist_lock); INIT_HLIST_HEAD(&net->packet.sklist); if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops)) diff --git a/net/packet/diag.c b/net/packet/diag.c index bc33fbe8a5ef..39bce0d50df9 100644 --- a/net/packet/diag.c +++ b/net/packet/diag.c @@ -177,8 +177,8 @@ static int packet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) net = sock_net(skb->sk); req = nlmsg_data(cb->nlh); - rcu_read_lock(); - sk_for_each_rcu(sk, node, &net->packet.sklist) { + mutex_lock(&net->packet.sklist_lock); + sk_for_each(sk, node, &net->packet.sklist) { if (!net_eq(sock_net(sk), net)) continue; if (num < s_num) @@ -192,7 +192,7 @@ next: num++; } done: - rcu_read_unlock(); + mutex_unlock(&net->packet.sklist_lock); cb->args[0] = num; return skb->len; -- cgit v1.2.3 From f63c45e0e63fd1bccb6d021fe4de20f82114a024 Mon Sep 17 00:00:00 2001 From: Rami Rosen Date: Thu, 23 Aug 2012 02:55:41 +0000 Subject: packet: fix broken build. This patch fixes a broken build due to a missing header: ... CC net/ipv4/proc.o In file included from include/net/net_namespace.h:15, from net/ipv4/proc.c:35: include/net/netns/packet.h:11: error: field 'sklist_lock' has incomplete type ... The lock of netns_packet has been replaced by a recent patch to be a mutex instead of a spinlock, but we need to replace the header file to be linux/mutex.h instead of linux/spinlock.h as well. See commit 0fa7fa98dbcc2789409ed24e885485e645803d7f: packet: Protect packet sk list with mutex (v2) patch, Signed-off-by: Rami Rosen Signed-off-by: David S. Miller --- include/net/netns/packet.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net/netns') diff --git a/include/net/netns/packet.h b/include/net/netns/packet.h index 4780b080a436..17ec2b95c062 100644 --- a/include/net/netns/packet.h +++ b/include/net/netns/packet.h @@ -5,7 +5,7 @@ #define __NETNS_PACKET_H__ #include -#include +#include struct netns_packet { struct mutex sklist_lock; -- cgit v1.2.3 From c7232c9979cba684c50b64c513c4a83c9aa70563 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 26 Aug 2012 19:14:06 +0200 Subject: netfilter: add protocol independent NAT core Convert the IPv4 NAT implementation to a protocol independent core and address family specific modules. Signed-off-by: Patrick McHardy --- include/linux/netfilter.h | 14 +- include/linux/netfilter/nf_nat.h | 8 + include/linux/netfilter/nfnetlink_conntrack.h | 6 +- include/linux/netfilter_ipv4.h | 1 - include/net/netfilter/nf_conntrack_expect.h | 2 +- include/net/netfilter/nf_nat.h | 2 +- include/net/netfilter/nf_nat_core.h | 5 +- include/net/netfilter/nf_nat_l3proto.h | 47 ++ include/net/netfilter/nf_nat_l4proto.h | 71 ++ include/net/netfilter/nf_nat_protocol.h | 67 -- include/net/netfilter/nf_nat_rule.h | 15 - include/net/netns/conntrack.h | 4 + include/net/netns/ipv4.h | 2 - net/ipv4/netfilter.c | 37 -- net/ipv4/netfilter/Kconfig | 64 +- net/ipv4/netfilter/Makefile | 11 +- net/ipv4/netfilter/ipt_MASQUERADE.c | 15 +- net/ipv4/netfilter/ipt_NETMAP.c | 15 +- net/ipv4/netfilter/ipt_REDIRECT.c | 15 +- net/ipv4/netfilter/iptable_nat.c | 320 +++++++++ net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 6 - net/ipv4/netfilter/nf_nat_amanda.c | 1 - net/ipv4/netfilter/nf_nat_core.c | 763 ---------------------- net/ipv4/netfilter/nf_nat_ftp.c | 1 - net/ipv4/netfilter/nf_nat_h323.c | 23 +- net/ipv4/netfilter/nf_nat_helper.c | 461 ------------- net/ipv4/netfilter/nf_nat_irc.c | 1 - net/ipv4/netfilter/nf_nat_l3proto_ipv4.c | 281 ++++++++ net/ipv4/netfilter/nf_nat_pptp.c | 15 +- net/ipv4/netfilter/nf_nat_proto_common.c | 114 ---- net/ipv4/netfilter/nf_nat_proto_dccp.c | 106 --- net/ipv4/netfilter/nf_nat_proto_gre.c | 30 +- net/ipv4/netfilter/nf_nat_proto_icmp.c | 24 +- net/ipv4/netfilter/nf_nat_proto_sctp.c | 96 --- net/ipv4/netfilter/nf_nat_proto_tcp.c | 91 --- net/ipv4/netfilter/nf_nat_proto_udp.c | 82 --- net/ipv4/netfilter/nf_nat_proto_udplite.c | 98 --- net/ipv4/netfilter/nf_nat_proto_unknown.c | 52 -- net/ipv4/netfilter/nf_nat_rule.c | 214 ------- net/ipv4/netfilter/nf_nat_sip.c | 19 +- net/ipv4/netfilter/nf_nat_standalone.c | 326 ---------- net/ipv4/netfilter/nf_nat_tftp.c | 1 - net/netfilter/Kconfig | 24 + net/netfilter/Makefile | 11 + net/netfilter/core.c | 5 + net/netfilter/nf_conntrack_core.c | 6 + net/netfilter/nf_conntrack_netlink.c | 35 +- net/netfilter/nf_conntrack_proto_tcp.c | 8 +- net/netfilter/nf_conntrack_sip.c | 6 +- net/netfilter/nf_nat_core.c | 854 +++++++++++++++++++++++++ net/netfilter/nf_nat_helper.c | 435 +++++++++++++ net/netfilter/nf_nat_proto_common.c | 112 ++++ net/netfilter/nf_nat_proto_dccp.c | 116 ++++ net/netfilter/nf_nat_proto_sctp.c | 103 +++ net/netfilter/nf_nat_proto_tcp.c | 85 +++ net/netfilter/nf_nat_proto_udp.c | 76 +++ net/netfilter/nf_nat_proto_udplite.c | 106 +++ net/netfilter/nf_nat_proto_unknown.c | 54 ++ net/netfilter/xt_nat.c | 167 +++++ 59 files changed, 3042 insertions(+), 2687 deletions(-) create mode 100644 include/net/netfilter/nf_nat_l3proto.h create mode 100644 include/net/netfilter/nf_nat_l4proto.h delete mode 100644 include/net/netfilter/nf_nat_protocol.h delete mode 100644 include/net/netfilter/nf_nat_rule.h create mode 100644 net/ipv4/netfilter/iptable_nat.c delete mode 100644 net/ipv4/netfilter/nf_nat_core.c delete mode 100644 net/ipv4/netfilter/nf_nat_helper.c create mode 100644 net/ipv4/netfilter/nf_nat_l3proto_ipv4.c delete mode 100644 net/ipv4/netfilter/nf_nat_proto_common.c delete mode 100644 net/ipv4/netfilter/nf_nat_proto_dccp.c delete mode 100644 net/ipv4/netfilter/nf_nat_proto_sctp.c delete mode 100644 net/ipv4/netfilter/nf_nat_proto_tcp.c delete mode 100644 net/ipv4/netfilter/nf_nat_proto_udp.c delete mode 100644 net/ipv4/netfilter/nf_nat_proto_udplite.c delete mode 100644 net/ipv4/netfilter/nf_nat_proto_unknown.c delete mode 100644 net/ipv4/netfilter/nf_nat_rule.c delete mode 100644 net/ipv4/netfilter/nf_nat_standalone.c create mode 100644 net/netfilter/nf_nat_core.c create mode 100644 net/netfilter/nf_nat_helper.c create mode 100644 net/netfilter/nf_nat_proto_common.c create mode 100644 net/netfilter/nf_nat_proto_dccp.c create mode 100644 net/netfilter/nf_nat_proto_sctp.c create mode 100644 net/netfilter/nf_nat_proto_tcp.c create mode 100644 net/netfilter/nf_nat_proto_udp.c create mode 100644 net/netfilter/nf_nat_proto_udplite.c create mode 100644 net/netfilter/nf_nat_proto_unknown.c create mode 100644 net/netfilter/xt_nat.c (limited to 'include/net/netns') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index c613cf0d7884..1dcf2a38e51f 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -342,7 +342,7 @@ extern int nf_register_afinfo(const struct nf_afinfo *afinfo); extern void nf_unregister_afinfo(const struct nf_afinfo *afinfo); #include -extern void (*ip_nat_decode_session)(struct sk_buff *, struct flowi *); +extern void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *); static inline void nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family) @@ -350,13 +350,11 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family) #ifdef CONFIG_NF_NAT_NEEDED void (*decodefn)(struct sk_buff *, struct flowi *); - if (family == AF_INET) { - rcu_read_lock(); - decodefn = rcu_dereference(ip_nat_decode_session); - if (decodefn) - decodefn(skb, fl); - rcu_read_unlock(); - } + rcu_read_lock(); + decodefn = rcu_dereference(nf_nat_decode_session_hook); + if (decodefn) + decodefn(skb, fl); + rcu_read_unlock(); #endif } diff --git a/include/linux/netfilter/nf_nat.h b/include/linux/netfilter/nf_nat.h index 8df2d13730b2..bf0cc373ffb6 100644 --- a/include/linux/netfilter/nf_nat.h +++ b/include/linux/netfilter/nf_nat.h @@ -22,4 +22,12 @@ struct nf_nat_ipv4_multi_range_compat { struct nf_nat_ipv4_range range[1]; }; +struct nf_nat_range { + unsigned int flags; + union nf_inet_addr min_addr; + union nf_inet_addr max_addr; + union nf_conntrack_man_proto min_proto; + union nf_conntrack_man_proto max_proto; +}; + #endif /* _NETFILTER_NF_NAT_H */ diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h index f649f7423ca2..68920eab287c 100644 --- a/include/linux/netfilter/nfnetlink_conntrack.h +++ b/include/linux/netfilter/nfnetlink_conntrack.h @@ -142,8 +142,10 @@ enum ctattr_tstamp { enum ctattr_nat { CTA_NAT_UNSPEC, - CTA_NAT_MINIP, - CTA_NAT_MAXIP, + CTA_NAT_V4_MINIP, +#define CTA_NAT_MINIP CTA_NAT_V4_MINIP + CTA_NAT_V4_MAXIP, +#define CTA_NAT_MAXIP CTA_NAT_V4_MAXIP CTA_NAT_PROTO, __CTA_NAT_MAX }; diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h index e2b12801378d..b962dfc695ae 100644 --- a/include/linux/netfilter_ipv4.h +++ b/include/linux/netfilter_ipv4.h @@ -79,7 +79,6 @@ enum nf_ip_hook_priorities { #ifdef __KERNEL__ extern int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type); -extern int ip_xfrm_me_harder(struct sk_buff *skb); extern __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook, unsigned int dataoff, u_int8_t protocol); #endif /*__KERNEL__*/ diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index 983f00263243..cc13f377a705 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -43,7 +43,7 @@ struct nf_conntrack_expect { unsigned int class; #ifdef CONFIG_NF_NAT_NEEDED - __be32 saved_ip; + union nf_inet_addr saved_addr; /* This is the original per-proto part, used to map the * expected connection the way the recipient expects. */ union nf_conntrack_man_proto saved_proto; diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h index b4de990b55f1..1752f1339054 100644 --- a/include/net/netfilter/nf_nat.h +++ b/include/net/netfilter/nf_nat.h @@ -50,7 +50,7 @@ struct nf_conn_nat { /* Set up the info structure to map into this range. */ extern unsigned int nf_nat_setup_info(struct nf_conn *ct, - const struct nf_nat_ipv4_range *range, + const struct nf_nat_range *range, enum nf_nat_manip_type maniptype); /* Is this tuple already taken? (not by us)*/ diff --git a/include/net/netfilter/nf_nat_core.h b/include/net/netfilter/nf_nat_core.h index b13d8d18d595..972e1e47ec79 100644 --- a/include/net/netfilter/nf_nat_core.h +++ b/include/net/netfilter/nf_nat_core.h @@ -12,10 +12,7 @@ extern unsigned int nf_nat_packet(struct nf_conn *ct, unsigned int hooknum, struct sk_buff *skb); -extern int nf_nat_icmp_reply_translation(struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int hooknum, - struct sk_buff *skb); +extern int nf_xfrm_me_harder(struct sk_buff *skb, unsigned int family); static inline int nf_nat_initialized(struct nf_conn *ct, enum nf_nat_manip_type manip) diff --git a/include/net/netfilter/nf_nat_l3proto.h b/include/net/netfilter/nf_nat_l3proto.h new file mode 100644 index 000000000000..beed96961fa7 --- /dev/null +++ b/include/net/netfilter/nf_nat_l3proto.h @@ -0,0 +1,47 @@ +#ifndef _NF_NAT_L3PROTO_H +#define _NF_NAT_L3PROTO_H + +struct nf_nat_l4proto; +struct nf_nat_l3proto { + u8 l3proto; + + bool (*in_range)(const struct nf_conntrack_tuple *t, + const struct nf_nat_range *range); + + u32 (*secure_port)(const struct nf_conntrack_tuple *t, __be16); + + bool (*manip_pkt)(struct sk_buff *skb, + unsigned int iphdroff, + const struct nf_nat_l4proto *l4proto, + const struct nf_conntrack_tuple *target, + enum nf_nat_manip_type maniptype); + + void (*csum_update)(struct sk_buff *skb, unsigned int iphdroff, + __sum16 *check, + const struct nf_conntrack_tuple *t, + enum nf_nat_manip_type maniptype); + + void (*csum_recalc)(struct sk_buff *skb, u8 proto, + void *data, __sum16 *check, + int datalen, int oldlen); + + void (*decode_session)(struct sk_buff *skb, + const struct nf_conn *ct, + enum ip_conntrack_dir dir, + unsigned long statusbit, + struct flowi *fl); + + int (*nlattr_to_range)(struct nlattr *tb[], + struct nf_nat_range *range); +}; + +extern int nf_nat_l3proto_register(const struct nf_nat_l3proto *); +extern void nf_nat_l3proto_unregister(const struct nf_nat_l3proto *); +extern const struct nf_nat_l3proto *__nf_nat_l3proto_find(u8 l3proto); + +extern int nf_nat_icmp_reply_translation(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int hooknum); + +#endif /* _NF_NAT_L3PROTO_H */ diff --git a/include/net/netfilter/nf_nat_l4proto.h b/include/net/netfilter/nf_nat_l4proto.h new file mode 100644 index 000000000000..1f0a4f018fcf --- /dev/null +++ b/include/net/netfilter/nf_nat_l4proto.h @@ -0,0 +1,71 @@ +/* Header for use in defining a given protocol. */ +#ifndef _NF_NAT_L4PROTO_H +#define _NF_NAT_L4PROTO_H +#include +#include + +struct nf_nat_range; +struct nf_nat_l3proto; + +struct nf_nat_l4proto { + /* Protocol number. */ + u8 l4proto; + + /* Translate a packet to the target according to manip type. + * Return true if succeeded. + */ + bool (*manip_pkt)(struct sk_buff *skb, + const struct nf_nat_l3proto *l3proto, + unsigned int iphdroff, unsigned int hdroff, + const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype); + + /* Is the manipable part of the tuple between min and max incl? */ + bool (*in_range)(const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype, + const union nf_conntrack_man_proto *min, + const union nf_conntrack_man_proto *max); + + /* Alter the per-proto part of the tuple (depending on + * maniptype), to give a unique tuple in the given range if + * possible. Per-protocol part of tuple is initialized to the + * incoming packet. + */ + void (*unique_tuple)(const struct nf_nat_l3proto *l3proto, + struct nf_conntrack_tuple *tuple, + const struct nf_nat_range *range, + enum nf_nat_manip_type maniptype, + const struct nf_conn *ct); + + int (*nlattr_to_range)(struct nlattr *tb[], + struct nf_nat_range *range); +}; + +/* Protocol registration. */ +extern int nf_nat_l4proto_register(u8 l3proto, const struct nf_nat_l4proto *l4proto); +extern void nf_nat_l4proto_unregister(u8 l3proto, const struct nf_nat_l4proto *l4proto); + +extern const struct nf_nat_l4proto *__nf_nat_l4proto_find(u8 l3proto, u8 l4proto); + +/* Built-in protocols. */ +extern const struct nf_nat_l4proto nf_nat_l4proto_tcp; +extern const struct nf_nat_l4proto nf_nat_l4proto_udp; +extern const struct nf_nat_l4proto nf_nat_l4proto_icmp; +extern const struct nf_nat_l4proto nf_nat_l4proto_unknown; + +extern bool nf_nat_l4proto_in_range(const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype, + const union nf_conntrack_man_proto *min, + const union nf_conntrack_man_proto *max); + +extern void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto, + struct nf_conntrack_tuple *tuple, + const struct nf_nat_range *range, + enum nf_nat_manip_type maniptype, + const struct nf_conn *ct, + u16 *rover); + +extern int nf_nat_l4proto_nlattr_to_range(struct nlattr *tb[], + struct nf_nat_range *range); + +#endif /*_NF_NAT_L4PROTO_H*/ diff --git a/include/net/netfilter/nf_nat_protocol.h b/include/net/netfilter/nf_nat_protocol.h deleted file mode 100644 index 7b0b51165f70..000000000000 --- a/include/net/netfilter/nf_nat_protocol.h +++ /dev/null @@ -1,67 +0,0 @@ -/* Header for use in defining a given protocol. */ -#ifndef _NF_NAT_PROTOCOL_H -#define _NF_NAT_PROTOCOL_H -#include -#include - -struct nf_nat_ipv4_range; - -struct nf_nat_protocol { - /* Protocol number. */ - unsigned int protonum; - - /* Translate a packet to the target according to manip type. - Return true if succeeded. */ - bool (*manip_pkt)(struct sk_buff *skb, - unsigned int iphdroff, - const struct nf_conntrack_tuple *tuple, - enum nf_nat_manip_type maniptype); - - /* Is the manipable part of the tuple between min and max incl? */ - bool (*in_range)(const struct nf_conntrack_tuple *tuple, - enum nf_nat_manip_type maniptype, - const union nf_conntrack_man_proto *min, - const union nf_conntrack_man_proto *max); - - /* Alter the per-proto part of the tuple (depending on - maniptype), to give a unique tuple in the given range if - possible. Per-protocol part of tuple is initialized to the - incoming packet. */ - void (*unique_tuple)(struct nf_conntrack_tuple *tuple, - const struct nf_nat_ipv4_range *range, - enum nf_nat_manip_type maniptype, - const struct nf_conn *ct); - - int (*nlattr_to_range)(struct nlattr *tb[], - struct nf_nat_ipv4_range *range); -}; - -/* Protocol registration. */ -extern int nf_nat_protocol_register(const struct nf_nat_protocol *proto); -extern void nf_nat_protocol_unregister(const struct nf_nat_protocol *proto); - -/* Built-in protocols. */ -extern const struct nf_nat_protocol nf_nat_protocol_tcp; -extern const struct nf_nat_protocol nf_nat_protocol_udp; -extern const struct nf_nat_protocol nf_nat_protocol_icmp; -extern const struct nf_nat_protocol nf_nat_unknown_protocol; - -extern int init_protocols(void) __init; -extern void cleanup_protocols(void); -extern const struct nf_nat_protocol *find_nat_proto(u_int16_t protonum); - -extern bool nf_nat_proto_in_range(const struct nf_conntrack_tuple *tuple, - enum nf_nat_manip_type maniptype, - const union nf_conntrack_man_proto *min, - const union nf_conntrack_man_proto *max); - -extern void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, - const struct nf_nat_ipv4_range *range, - enum nf_nat_manip_type maniptype, - const struct nf_conn *ct, - u_int16_t *rover); - -extern int nf_nat_proto_nlattr_to_range(struct nlattr *tb[], - struct nf_nat_ipv4_range *range); - -#endif /*_NF_NAT_PROTO_H*/ diff --git a/include/net/netfilter/nf_nat_rule.h b/include/net/netfilter/nf_nat_rule.h deleted file mode 100644 index 2890bdc4cd92..000000000000 --- a/include/net/netfilter/nf_nat_rule.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef _NF_NAT_RULE_H -#define _NF_NAT_RULE_H -#include -#include -#include - -extern int nf_nat_rule_init(void) __init; -extern void nf_nat_rule_cleanup(void); -extern int nf_nat_rule_find(struct sk_buff *skb, - unsigned int hooknum, - const struct net_device *in, - const struct net_device *out, - struct nf_conn *ct); - -#endif /* _NF_NAT_RULE_H */ diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 3aecdc7a84fb..a1d83cc8bf85 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -83,6 +83,10 @@ struct netns_ct { int sysctl_auto_assign_helper; bool auto_assign_helper_warned; struct nf_ip_net nf_ct_proto; +#ifdef CONFIG_NF_NAT_NEEDED + struct hlist_head *nat_bysource; + unsigned int nat_htable_size; +#endif #ifdef CONFIG_SYSCTL struct ctl_table_header *sysctl_header; struct ctl_table_header *acct_sysctl_header; diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 1474dd65c66f..ace280d19a20 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -51,8 +51,6 @@ struct netns_ipv4 { struct xt_table *iptable_security; #endif struct xt_table *nat_table; - struct hlist_head *nat_bysource; - unsigned int nat_htable_size; #endif int sysctl_icmp_echo_ignore_all; diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index ed1b36783192..f1643c0c3587 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -72,43 +72,6 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned int addr_type) } EXPORT_SYMBOL(ip_route_me_harder); -#ifdef CONFIG_XFRM -int ip_xfrm_me_harder(struct sk_buff *skb) -{ - struct flowi fl; - unsigned int hh_len; - struct dst_entry *dst; - - if (IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) - return 0; - if (xfrm_decode_session(skb, &fl, AF_INET) < 0) - return -1; - - dst = skb_dst(skb); - if (dst->xfrm) - dst = ((struct xfrm_dst *)dst)->route; - dst_hold(dst); - - dst = xfrm_lookup(dev_net(dst->dev), dst, &fl, skb->sk, 0); - if (IS_ERR(dst)) - return -1; - - skb_dst_drop(skb); - skb_dst_set(skb, dst); - - /* Change in oif may mean change in hh_len. */ - hh_len = skb_dst(skb)->dev->hard_header_len; - if (skb_headroom(skb) < hh_len && - pskb_expand_head(skb, hh_len - skb_headroom(skb), 0, GFP_ATOMIC)) - return -1; - return 0; -} -EXPORT_SYMBOL(ip_xfrm_me_harder); -#endif - -void (*ip_nat_decode_session)(struct sk_buff *, struct flowi *); -EXPORT_SYMBOL(ip_nat_decode_session); - /* * Extra routing may needed on local out, as the QUEUE target never * returns control to the table. diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index fcc543cd987a..b26629681bdb 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -143,25 +143,22 @@ config IP_NF_TARGET_ULOG To compile it as a module, choose M here. If unsure, say N. # NAT + specific targets: nf_conntrack -config NF_NAT - tristate "Full NAT" +config NF_NAT_IPV4 + tristate "IPv4 NAT" depends on NF_CONNTRACK_IPV4 default m if NETFILTER_ADVANCED=n + select NF_NAT help - The Full NAT option allows masquerading, port forwarding and other + The IPv4 NAT option allows masquerading, port forwarding and other forms of full Network Address Port Translation. It is controlled by the `nat' table in iptables: see the man page for iptables(8). To compile it as a module, choose M here. If unsure, say N. -config NF_NAT_NEEDED - bool - depends on NF_NAT - default y +if NF_NAT_IPV4 config IP_NF_TARGET_MASQUERADE tristate "MASQUERADE target support" - depends on NF_NAT default m if NETFILTER_ADVANCED=n help Masquerading is a special case of NAT: all outgoing connections are @@ -174,7 +171,6 @@ config IP_NF_TARGET_MASQUERADE config IP_NF_TARGET_NETMAP tristate "NETMAP target support" - depends on NF_NAT depends on NETFILTER_ADVANCED help NETMAP is an implementation of static 1:1 NAT mapping of network @@ -185,7 +181,6 @@ config IP_NF_TARGET_NETMAP config IP_NF_TARGET_REDIRECT tristate "REDIRECT target support" - depends on NF_NAT depends on NETFILTER_ADVANCED help REDIRECT is a special case of NAT: all incoming connections are @@ -195,9 +190,11 @@ config IP_NF_TARGET_REDIRECT To compile it as a module, choose M here. If unsure, say N. +endif + config NF_NAT_SNMP_BASIC tristate "Basic SNMP-ALG support" - depends on NF_CONNTRACK_SNMP && NF_NAT + depends on NF_CONNTRACK_SNMP && NF_NAT_IPV4 depends on NETFILTER_ADVANCED default NF_NAT && NF_CONNTRACK_SNMP ---help--- @@ -219,61 +216,46 @@ config NF_NAT_SNMP_BASIC # '&&' (6) # # (6) Returns the result of min(/expr/, /expr/). -config NF_NAT_PROTO_DCCP - tristate - depends on NF_NAT && NF_CT_PROTO_DCCP - default NF_NAT && NF_CT_PROTO_DCCP config NF_NAT_PROTO_GRE tristate - depends on NF_NAT && NF_CT_PROTO_GRE - -config NF_NAT_PROTO_UDPLITE - tristate - depends on NF_NAT && NF_CT_PROTO_UDPLITE - default NF_NAT && NF_CT_PROTO_UDPLITE - -config NF_NAT_PROTO_SCTP - tristate - default NF_NAT && NF_CT_PROTO_SCTP - depends on NF_NAT && NF_CT_PROTO_SCTP - select LIBCRC32C + depends on NF_NAT_IPV4 && NF_CT_PROTO_GRE config NF_NAT_FTP tristate - depends on NF_CONNTRACK && NF_NAT - default NF_NAT && NF_CONNTRACK_FTP + depends on NF_CONNTRACK && NF_NAT_IPV4 + default NF_NAT_IPV4 && NF_CONNTRACK_FTP config NF_NAT_IRC tristate - depends on NF_CONNTRACK && NF_NAT - default NF_NAT && NF_CONNTRACK_IRC + depends on NF_CONNTRACK && NF_NAT_IPV4 + default NF_NAT_IPV4 && NF_CONNTRACK_IRC config NF_NAT_TFTP tristate - depends on NF_CONNTRACK && NF_NAT - default NF_NAT && NF_CONNTRACK_TFTP + depends on NF_CONNTRACK && NF_NAT_IPV4 + default NF_NAT_IPV4 && NF_CONNTRACK_TFTP config NF_NAT_AMANDA tristate - depends on NF_CONNTRACK && NF_NAT - default NF_NAT && NF_CONNTRACK_AMANDA + depends on NF_CONNTRACK && NF_NAT_IPV4 + default NF_NAT_IPV4 && NF_CONNTRACK_AMANDA config NF_NAT_PPTP tristate - depends on NF_CONNTRACK && NF_NAT - default NF_NAT && NF_CONNTRACK_PPTP + depends on NF_CONNTRACK && NF_NAT_IPV4 + default NF_NAT_IPV4 && NF_CONNTRACK_PPTP select NF_NAT_PROTO_GRE config NF_NAT_H323 tristate - depends on NF_CONNTRACK && NF_NAT - default NF_NAT && NF_CONNTRACK_H323 + depends on NF_CONNTRACK && NF_NAT_IPV4 + default NF_NAT_IPV4 && NF_CONNTRACK_H323 config NF_NAT_SIP tristate - depends on NF_CONNTRACK && NF_NAT - default NF_NAT && NF_CONNTRACK_SIP + depends on NF_CONNTRACK && NF_NAT_IPV4 + default NF_NAT_IPV4 && NF_CONNTRACK_SIP # mangle + specific targets config IP_NF_MANGLE diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index c20674dc9452..0ea3acc510e2 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -10,13 +10,11 @@ nf_conntrack_ipv4-objs += nf_conntrack_l3proto_ipv4_compat.o endif endif -nf_nat-y := nf_nat_core.o nf_nat_helper.o nf_nat_proto_unknown.o nf_nat_proto_common.o nf_nat_proto_tcp.o nf_nat_proto_udp.o nf_nat_proto_icmp.o -iptable_nat-y := nf_nat_rule.o nf_nat_standalone.o - # connection tracking obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o -obj-$(CONFIG_NF_NAT) += nf_nat.o +nf_nat_ipv4-y := nf_nat_l3proto_ipv4.o nf_nat_proto_icmp.o +obj-$(CONFIG_NF_NAT_IPV4) += nf_nat_ipv4.o # defrag obj-$(CONFIG_NF_DEFRAG_IPV4) += nf_defrag_ipv4.o @@ -32,10 +30,7 @@ obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o # NAT protocols (nf_nat) -obj-$(CONFIG_NF_NAT_PROTO_DCCP) += nf_nat_proto_dccp.o obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o -obj-$(CONFIG_NF_NAT_PROTO_UDPLITE) += nf_nat_proto_udplite.o -obj-$(CONFIG_NF_NAT_PROTO_SCTP) += nf_nat_proto_sctp.o # generic IP tables obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o @@ -43,7 +38,7 @@ obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o # the three instances of ip_tables obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o -obj-$(CONFIG_NF_NAT) += iptable_nat.o +obj-$(CONFIG_NF_NAT_IPV4) += iptable_nat.o obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o obj-$(CONFIG_IP_NF_SECURITY) += iptable_security.o diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index cbb6a1a6f6f7..1c3aa28b51ae 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -19,9 +19,9 @@ #include #include #include -#include #include #include +#include MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team "); @@ -49,7 +49,7 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par) struct nf_conn *ct; struct nf_conn_nat *nat; enum ip_conntrack_info ctinfo; - struct nf_nat_ipv4_range newrange; + struct nf_nat_range newrange; const struct nf_nat_ipv4_multi_range_compat *mr; const struct rtable *rt; __be32 newsrc, nh; @@ -80,10 +80,13 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par) nat->masq_index = par->out->ifindex; /* Transfer from original range. */ - newrange = ((struct nf_nat_ipv4_range) - { mr->range[0].flags | NF_NAT_RANGE_MAP_IPS, - newsrc, newsrc, - mr->range[0].min, mr->range[0].max }); + memset(&newrange.min_addr, 0, sizeof(newrange.min_addr)); + memset(&newrange.max_addr, 0, sizeof(newrange.max_addr)); + newrange.flags = mr->range[0].flags | NF_NAT_RANGE_MAP_IPS; + newrange.min_addr.ip = newsrc; + newrange.max_addr.ip = newsrc; + newrange.min_proto = mr->range[0].min; + newrange.max_proto = mr->range[0].max; /* Hand modified range to generic setup. */ return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC); diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c index b5bfbbabf70d..85028dc0425d 100644 --- a/net/ipv4/netfilter/ipt_NETMAP.c +++ b/net/ipv4/netfilter/ipt_NETMAP.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include MODULE_LICENSE("GPL"); MODULE_AUTHOR("Svenning Soerensen "); @@ -44,7 +44,7 @@ netmap_tg(struct sk_buff *skb, const struct xt_action_param *par) enum ip_conntrack_info ctinfo; __be32 new_ip, netmask; const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; - struct nf_nat_ipv4_range newrange; + struct nf_nat_range newrange; NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || par->hooknum == NF_INET_POST_ROUTING || @@ -61,10 +61,13 @@ netmap_tg(struct sk_buff *skb, const struct xt_action_param *par) new_ip = ip_hdr(skb)->saddr & ~netmask; new_ip |= mr->range[0].min_ip & netmask; - newrange = ((struct nf_nat_ipv4_range) - { mr->range[0].flags | NF_NAT_RANGE_MAP_IPS, - new_ip, new_ip, - mr->range[0].min, mr->range[0].max }); + memset(&newrange.min_addr, 0, sizeof(newrange.min_addr)); + memset(&newrange.max_addr, 0, sizeof(newrange.max_addr)); + newrange.flags = mr->range[0].flags | NF_NAT_RANGE_MAP_IPS; + newrange.min_addr.ip = new_ip; + newrange.max_addr.ip = new_ip; + newrange.min_proto = mr->range[0].min; + newrange.max_proto = mr->range[0].max; /* Hand modified range to generic setup. */ return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(par->hooknum)); diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c index 7c0103a5203e..11407d7d2472 100644 --- a/net/ipv4/netfilter/ipt_REDIRECT.c +++ b/net/ipv4/netfilter/ipt_REDIRECT.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team "); @@ -48,7 +48,7 @@ redirect_tg(struct sk_buff *skb, const struct xt_action_param *par) enum ip_conntrack_info ctinfo; __be32 newdst; const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; - struct nf_nat_ipv4_range newrange; + struct nf_nat_range newrange; NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || par->hooknum == NF_INET_LOCAL_OUT); @@ -76,10 +76,13 @@ redirect_tg(struct sk_buff *skb, const struct xt_action_param *par) } /* Transfer from original range. */ - newrange = ((struct nf_nat_ipv4_range) - { mr->range[0].flags | NF_NAT_RANGE_MAP_IPS, - newdst, newdst, - mr->range[0].min, mr->range[0].max }); + memset(&newrange.min_addr, 0, sizeof(newrange.min_addr)); + memset(&newrange.max_addr, 0, sizeof(newrange.max_addr)); + newrange.flags = mr->range[0].flags | NF_NAT_RANGE_MAP_IPS; + newrange.min_addr.ip = newdst; + newrange.max_addr.ip = newdst; + newrange.min_proto = mr->range[0].min; + newrange.max_proto = mr->range[0].max; /* Hand modified range to generic setup. */ return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_DST); diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c new file mode 100644 index 000000000000..9e0ffaf1d942 --- /dev/null +++ b/net/ipv4/netfilter/iptable_nat.c @@ -0,0 +1,320 @@ +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2006 Netfilter Core Team + * (C) 2011 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +static const struct xt_table nf_nat_ipv4_table = { + .name = "nat", + .valid_hooks = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_LOCAL_IN), + .me = THIS_MODULE, + .af = NFPROTO_IPV4, +}; + +static unsigned int alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) +{ + /* Force range to this IP; let proto decide mapping for + * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED). + */ + struct nf_nat_range range; + + range.flags = 0; + pr_debug("Allocating NULL binding for %p (%pI4)\n", ct, + HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ? + &ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip : + &ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip); + + return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum)); +} + +static unsigned int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum, + const struct net_device *in, + const struct net_device *out, + struct nf_conn *ct) +{ + struct net *net = nf_ct_net(ct); + unsigned int ret; + + ret = ipt_do_table(skb, hooknum, in, out, net->ipv4.nat_table); + if (ret == NF_ACCEPT) { + if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum))) + ret = alloc_null_binding(ct, hooknum); + } + return ret; +} + +static unsigned int +nf_nat_ipv4_fn(unsigned int hooknum, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + struct nf_conn_nat *nat; + /* maniptype == SRC for postrouting. */ + enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum); + + /* We never see fragments: conntrack defrags on pre-routing + * and local-out, and nf_nat_out protects post-routing. + */ + NF_CT_ASSERT(!ip_is_fragment(ip_hdr(skb))); + + ct = nf_ct_get(skb, &ctinfo); + /* Can't track? It's not due to stress, or conntrack would + * have dropped it. Hence it's the user's responsibilty to + * packet filter it out, or implement conntrack/NAT for that + * protocol. 8) --RR + */ + if (!ct) + return NF_ACCEPT; + + /* Don't try to NAT if this packet is not conntracked */ + if (nf_ct_is_untracked(ct)) + return NF_ACCEPT; + + nat = nfct_nat(ct); + if (!nat) { + /* NAT module was loaded late. */ + if (nf_ct_is_confirmed(ct)) + return NF_ACCEPT; + nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); + if (nat == NULL) { + pr_debug("failed to add NAT extension\n"); + return NF_ACCEPT; + } + } + + switch (ctinfo) { + case IP_CT_RELATED: + case IP_CT_RELATED_REPLY: + if (ip_hdr(skb)->protocol == IPPROTO_ICMP) { + if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo, + hooknum)) + return NF_DROP; + else + return NF_ACCEPT; + } + /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */ + case IP_CT_NEW: + /* Seen it before? This can happen for loopback, retrans, + * or local packets. + */ + if (!nf_nat_initialized(ct, maniptype)) { + unsigned int ret; + + ret = nf_nat_rule_find(skb, hooknum, in, out, ct); + if (ret != NF_ACCEPT) + return ret; + } else + pr_debug("Already setup manip %s for ct %p\n", + maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST", + ct); + break; + + default: + /* ESTABLISHED */ + NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || + ctinfo == IP_CT_ESTABLISHED_REPLY); + } + + return nf_nat_packet(ct, ctinfo, hooknum, skb); +} + +static unsigned int +nf_nat_ipv4_in(unsigned int hooknum, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + unsigned int ret; + __be32 daddr = ip_hdr(skb)->daddr; + + ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn); + if (ret != NF_DROP && ret != NF_STOLEN && + daddr != ip_hdr(skb)->daddr) + skb_dst_drop(skb); + + return ret; +} + +static unsigned int +nf_nat_ipv4_out(unsigned int hooknum, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ +#ifdef CONFIG_XFRM + const struct nf_conn *ct; + enum ip_conntrack_info ctinfo; +#endif + unsigned int ret; + + /* root is playing with raw sockets. */ + if (skb->len < sizeof(struct iphdr) || + ip_hdrlen(skb) < sizeof(struct iphdr)) + return NF_ACCEPT; + + ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn); +#ifdef CONFIG_XFRM + if (ret != NF_DROP && ret != NF_STOLEN && + !(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && + (ct = nf_ct_get(skb, &ctinfo)) != NULL) { + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + + if ((ct->tuplehash[dir].tuple.src.u3.ip != + ct->tuplehash[!dir].tuple.dst.u3.ip) || + (ct->tuplehash[dir].tuple.src.u.all != + ct->tuplehash[!dir].tuple.dst.u.all)) + if (nf_xfrm_me_harder(skb, AF_INET) < 0) + ret = NF_DROP; + } +#endif + return ret; +} + +static unsigned int +nf_nat_ipv4_local_fn(unsigned int hooknum, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + const struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + unsigned int ret; + + /* root is playing with raw sockets. */ + if (skb->len < sizeof(struct iphdr) || + ip_hdrlen(skb) < sizeof(struct iphdr)) + return NF_ACCEPT; + + ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn); + if (ret != NF_DROP && ret != NF_STOLEN && + (ct = nf_ct_get(skb, &ctinfo)) != NULL) { + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + + if (ct->tuplehash[dir].tuple.dst.u3.ip != + ct->tuplehash[!dir].tuple.src.u3.ip) { + if (ip_route_me_harder(skb, RTN_UNSPEC)) + ret = NF_DROP; + } +#ifdef CONFIG_XFRM + else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && + ct->tuplehash[dir].tuple.dst.u.all != + ct->tuplehash[!dir].tuple.src.u.all) + if (nf_xfrm_me_harder(skb, AF_INET) < 0) + ret = NF_DROP; +#endif + } + return ret; +} + +static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = { + /* Before packet filtering, change destination */ + { + .hook = nf_nat_ipv4_in, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_PRE_ROUTING, + .priority = NF_IP_PRI_NAT_DST, + }, + /* After packet filtering, change source */ + { + .hook = nf_nat_ipv4_out, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_POST_ROUTING, + .priority = NF_IP_PRI_NAT_SRC, + }, + /* Before packet filtering, change destination */ + { + .hook = nf_nat_ipv4_local_fn, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_LOCAL_OUT, + .priority = NF_IP_PRI_NAT_DST, + }, + /* After packet filtering, change source */ + { + .hook = nf_nat_ipv4_fn, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_LOCAL_IN, + .priority = NF_IP_PRI_NAT_SRC, + }, +}; + +static int __net_init iptable_nat_net_init(struct net *net) +{ + struct ipt_replace *repl; + + repl = ipt_alloc_initial_table(&nf_nat_ipv4_table); + if (repl == NULL) + return -ENOMEM; + net->ipv4.nat_table = ipt_register_table(net, &nf_nat_ipv4_table, repl); + kfree(repl); + if (IS_ERR(net->ipv4.nat_table)) + return PTR_ERR(net->ipv4.nat_table); + return 0; +} + +static void __net_exit iptable_nat_net_exit(struct net *net) +{ + ipt_unregister_table(net, net->ipv4.nat_table); +} + +static struct pernet_operations iptable_nat_net_ops = { + .init = iptable_nat_net_init, + .exit = iptable_nat_net_exit, +}; + +static int __init iptable_nat_init(void) +{ + int err; + + err = register_pernet_subsys(&iptable_nat_net_ops); + if (err < 0) + goto err1; + + err = nf_register_hooks(nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops)); + if (err < 0) + goto err2; + return 0; + +err2: + unregister_pernet_subsys(&iptable_nat_net_ops); +err1: + return err; +} + +static void __exit iptable_nat_exit(void) +{ + nf_unregister_hooks(nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops)); + unregister_pernet_subsys(&iptable_nat_net_ops); +} + +module_init(iptable_nat_init); +module_exit(iptable_nat_exit); + +MODULE_LICENSE("GPL"); diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 4ada3295d9a7..fcdd0c2406e6 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -29,12 +29,6 @@ #include #include -int (*nf_nat_seq_adjust_hook)(struct sk_buff *skb, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int protoff); -EXPORT_SYMBOL_GPL(nf_nat_seq_adjust_hook); - static bool ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, struct nf_conntrack_tuple *tuple) { diff --git a/net/ipv4/netfilter/nf_nat_amanda.c b/net/ipv4/netfilter/nf_nat_amanda.c index 75464b62f5f2..42d337881171 100644 --- a/net/ipv4/netfilter/nf_nat_amanda.c +++ b/net/ipv4/netfilter/nf_nat_amanda.c @@ -16,7 +16,6 @@ #include #include #include -#include #include MODULE_AUTHOR("Brian J. Murrell "); diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c deleted file mode 100644 index 44b082fd48ab..000000000000 --- a/net/ipv4/netfilter/nf_nat_core.c +++ /dev/null @@ -1,763 +0,0 @@ -/* NAT for netfilter; shared with compatibility layer. */ - -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2006 Netfilter Core Team - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include /* For tcp_prot in getorigdst */ -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static DEFINE_SPINLOCK(nf_nat_lock); - -static struct nf_conntrack_l3proto *l3proto __read_mostly; - -#define MAX_IP_NAT_PROTO 256 -static const struct nf_nat_protocol __rcu *nf_nat_protos[MAX_IP_NAT_PROTO] - __read_mostly; - -static inline const struct nf_nat_protocol * -__nf_nat_proto_find(u_int8_t protonum) -{ - return rcu_dereference(nf_nat_protos[protonum]); -} - -/* We keep an extra hash for each conntrack, for fast searching. */ -static inline unsigned int -hash_by_src(const struct net *net, u16 zone, - const struct nf_conntrack_tuple *tuple) -{ - unsigned int hash; - - /* Original src, to ensure we map it consistently if poss. */ - hash = jhash_3words((__force u32)tuple->src.u3.ip, - (__force u32)tuple->src.u.all ^ zone, - tuple->dst.protonum, nf_conntrack_hash_rnd); - return ((u64)hash * net->ipv4.nat_htable_size) >> 32; -} - -/* Is this tuple already taken? (not by us) */ -int -nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple, - const struct nf_conn *ignored_conntrack) -{ - /* Conntrack tracking doesn't keep track of outgoing tuples; only - incoming ones. NAT means they don't have a fixed mapping, - so we invert the tuple and look for the incoming reply. - - We could keep a separate hash if this proves too slow. */ - struct nf_conntrack_tuple reply; - - nf_ct_invert_tuplepr(&reply, tuple); - return nf_conntrack_tuple_taken(&reply, ignored_conntrack); -} -EXPORT_SYMBOL(nf_nat_used_tuple); - -/* If we source map this tuple so reply looks like reply_tuple, will - * that meet the constraints of range. */ -static int -in_range(const struct nf_conntrack_tuple *tuple, - const struct nf_nat_ipv4_range *range) -{ - const struct nf_nat_protocol *proto; - int ret = 0; - - /* If we are supposed to map IPs, then we must be in the - range specified, otherwise let this drag us onto a new src IP. */ - if (range->flags & NF_NAT_RANGE_MAP_IPS) { - if (ntohl(tuple->src.u3.ip) < ntohl(range->min_ip) || - ntohl(tuple->src.u3.ip) > ntohl(range->max_ip)) - return 0; - } - - rcu_read_lock(); - proto = __nf_nat_proto_find(tuple->dst.protonum); - if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) || - proto->in_range(tuple, NF_NAT_MANIP_SRC, - &range->min, &range->max)) - ret = 1; - rcu_read_unlock(); - - return ret; -} - -static inline int -same_src(const struct nf_conn *ct, - const struct nf_conntrack_tuple *tuple) -{ - const struct nf_conntrack_tuple *t; - - t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; - return (t->dst.protonum == tuple->dst.protonum && - t->src.u3.ip == tuple->src.u3.ip && - t->src.u.all == tuple->src.u.all); -} - -/* Only called for SRC manip */ -static int -find_appropriate_src(struct net *net, u16 zone, - const struct nf_conntrack_tuple *tuple, - struct nf_conntrack_tuple *result, - const struct nf_nat_ipv4_range *range) -{ - unsigned int h = hash_by_src(net, zone, tuple); - const struct nf_conn_nat *nat; - const struct nf_conn *ct; - const struct hlist_node *n; - - rcu_read_lock(); - hlist_for_each_entry_rcu(nat, n, &net->ipv4.nat_bysource[h], bysource) { - ct = nat->ct; - if (same_src(ct, tuple) && nf_ct_zone(ct) == zone) { - /* Copy source part from reply tuple. */ - nf_ct_invert_tuplepr(result, - &ct->tuplehash[IP_CT_DIR_REPLY].tuple); - result->dst = tuple->dst; - - if (in_range(result, range)) { - rcu_read_unlock(); - return 1; - } - } - } - rcu_read_unlock(); - return 0; -} - -/* For [FUTURE] fragmentation handling, we want the least-used - src-ip/dst-ip/proto triple. Fairness doesn't come into it. Thus - if the range specifies 1.2.3.4 ports 10000-10005 and 1.2.3.5 ports - 1-65535, we don't do pro-rata allocation based on ports; we choose - the ip with the lowest src-ip/dst-ip/proto usage. -*/ -static void -find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple, - const struct nf_nat_ipv4_range *range, - const struct nf_conn *ct, - enum nf_nat_manip_type maniptype) -{ - __be32 *var_ipp; - /* Host order */ - u_int32_t minip, maxip, j; - - /* No IP mapping? Do nothing. */ - if (!(range->flags & NF_NAT_RANGE_MAP_IPS)) - return; - - if (maniptype == NF_NAT_MANIP_SRC) - var_ipp = &tuple->src.u3.ip; - else - var_ipp = &tuple->dst.u3.ip; - - /* Fast path: only one choice. */ - if (range->min_ip == range->max_ip) { - *var_ipp = range->min_ip; - return; - } - - /* Hashing source and destination IPs gives a fairly even - * spread in practice (if there are a small number of IPs - * involved, there usually aren't that many connections - * anyway). The consistency means that servers see the same - * client coming from the same IP (some Internet Banking sites - * like this), even across reboots. */ - minip = ntohl(range->min_ip); - maxip = ntohl(range->max_ip); - j = jhash_2words((__force u32)tuple->src.u3.ip, - range->flags & NF_NAT_RANGE_PERSISTENT ? - 0 : (__force u32)tuple->dst.u3.ip ^ zone, 0); - j = ((u64)j * (maxip - minip + 1)) >> 32; - *var_ipp = htonl(minip + j); -} - -/* Manipulate the tuple into the range given. For NF_INET_POST_ROUTING, - * we change the source to map into the range. For NF_INET_PRE_ROUTING - * and NF_INET_LOCAL_OUT, we change the destination to map into the - * range. It might not be possible to get a unique tuple, but we try. - * At worst (or if we race), we will end up with a final duplicate in - * __ip_conntrack_confirm and drop the packet. */ -static void -get_unique_tuple(struct nf_conntrack_tuple *tuple, - const struct nf_conntrack_tuple *orig_tuple, - const struct nf_nat_ipv4_range *range, - struct nf_conn *ct, - enum nf_nat_manip_type maniptype) -{ - struct net *net = nf_ct_net(ct); - const struct nf_nat_protocol *proto; - u16 zone = nf_ct_zone(ct); - - /* 1) If this srcip/proto/src-proto-part is currently mapped, - and that same mapping gives a unique tuple within the given - range, use that. - - This is only required for source (ie. NAT/masq) mappings. - So far, we don't do local source mappings, so multiple - manips not an issue. */ - if (maniptype == NF_NAT_MANIP_SRC && - !(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) { - /* try the original tuple first */ - if (in_range(orig_tuple, range)) { - if (!nf_nat_used_tuple(orig_tuple, ct)) { - *tuple = *orig_tuple; - return; - } - } else if (find_appropriate_src(net, zone, orig_tuple, tuple, - range)) { - pr_debug("get_unique_tuple: Found current src map\n"); - if (!nf_nat_used_tuple(tuple, ct)) - return; - } - } - - /* 2) Select the least-used IP/proto combination in the given - range. */ - *tuple = *orig_tuple; - find_best_ips_proto(zone, tuple, range, ct, maniptype); - - /* 3) The per-protocol part of the manip is made to map into - the range to make a unique tuple. */ - - rcu_read_lock(); - proto = __nf_nat_proto_find(orig_tuple->dst.protonum); - - /* Only bother mapping if it's not already in range and unique */ - if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) { - if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) { - if (proto->in_range(tuple, maniptype, &range->min, - &range->max) && - (range->min.all == range->max.all || - !nf_nat_used_tuple(tuple, ct))) - goto out; - } else if (!nf_nat_used_tuple(tuple, ct)) { - goto out; - } - } - - /* Last change: get protocol to try to obtain unique tuple. */ - proto->unique_tuple(tuple, range, maniptype, ct); -out: - rcu_read_unlock(); -} - -unsigned int -nf_nat_setup_info(struct nf_conn *ct, - const struct nf_nat_ipv4_range *range, - enum nf_nat_manip_type maniptype) -{ - struct net *net = nf_ct_net(ct); - struct nf_conntrack_tuple curr_tuple, new_tuple; - struct nf_conn_nat *nat; - - /* nat helper or nfctnetlink also setup binding */ - nat = nfct_nat(ct); - if (!nat) { - nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); - if (nat == NULL) { - pr_debug("failed to add NAT extension\n"); - return NF_ACCEPT; - } - } - - NF_CT_ASSERT(maniptype == NF_NAT_MANIP_SRC || - maniptype == NF_NAT_MANIP_DST); - BUG_ON(nf_nat_initialized(ct, maniptype)); - - /* What we've got will look like inverse of reply. Normally - this is what is in the conntrack, except for prior - manipulations (future optimization: if num_manips == 0, - orig_tp = - conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple) */ - nf_ct_invert_tuplepr(&curr_tuple, - &ct->tuplehash[IP_CT_DIR_REPLY].tuple); - - get_unique_tuple(&new_tuple, &curr_tuple, range, ct, maniptype); - - if (!nf_ct_tuple_equal(&new_tuple, &curr_tuple)) { - struct nf_conntrack_tuple reply; - - /* Alter conntrack table so will recognize replies. */ - nf_ct_invert_tuplepr(&reply, &new_tuple); - nf_conntrack_alter_reply(ct, &reply); - - /* Non-atomic: we own this at the moment. */ - if (maniptype == NF_NAT_MANIP_SRC) - ct->status |= IPS_SRC_NAT; - else - ct->status |= IPS_DST_NAT; - } - - if (maniptype == NF_NAT_MANIP_SRC) { - unsigned int srchash; - - srchash = hash_by_src(net, nf_ct_zone(ct), - &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); - spin_lock_bh(&nf_nat_lock); - /* nf_conntrack_alter_reply might re-allocate extension area */ - nat = nfct_nat(ct); - nat->ct = ct; - hlist_add_head_rcu(&nat->bysource, - &net->ipv4.nat_bysource[srchash]); - spin_unlock_bh(&nf_nat_lock); - } - - /* It's done. */ - if (maniptype == NF_NAT_MANIP_DST) - ct->status |= IPS_DST_NAT_DONE; - else - ct->status |= IPS_SRC_NAT_DONE; - - return NF_ACCEPT; -} -EXPORT_SYMBOL(nf_nat_setup_info); - -/* Returns true if succeeded. */ -static bool -manip_pkt(u_int16_t proto, - struct sk_buff *skb, - unsigned int iphdroff, - const struct nf_conntrack_tuple *target, - enum nf_nat_manip_type maniptype) -{ - struct iphdr *iph; - const struct nf_nat_protocol *p; - - if (!skb_make_writable(skb, iphdroff + sizeof(*iph))) - return false; - - iph = (void *)skb->data + iphdroff; - - /* Manipulate protcol part. */ - - /* rcu_read_lock()ed by nf_hook_slow */ - p = __nf_nat_proto_find(proto); - if (!p->manip_pkt(skb, iphdroff, target, maniptype)) - return false; - - iph = (void *)skb->data + iphdroff; - - if (maniptype == NF_NAT_MANIP_SRC) { - csum_replace4(&iph->check, iph->saddr, target->src.u3.ip); - iph->saddr = target->src.u3.ip; - } else { - csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip); - iph->daddr = target->dst.u3.ip; - } - return true; -} - -/* Do packet manipulations according to nf_nat_setup_info. */ -unsigned int nf_nat_packet(struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int hooknum, - struct sk_buff *skb) -{ - enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); - unsigned long statusbit; - enum nf_nat_manip_type mtype = HOOK2MANIP(hooknum); - - if (mtype == NF_NAT_MANIP_SRC) - statusbit = IPS_SRC_NAT; - else - statusbit = IPS_DST_NAT; - - /* Invert if this is reply dir. */ - if (dir == IP_CT_DIR_REPLY) - statusbit ^= IPS_NAT_MASK; - - /* Non-atomic: these bits don't change. */ - if (ct->status & statusbit) { - struct nf_conntrack_tuple target; - - /* We are aiming to look like inverse of other direction. */ - nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); - - if (!manip_pkt(target.dst.protonum, skb, 0, &target, mtype)) - return NF_DROP; - } - return NF_ACCEPT; -} -EXPORT_SYMBOL_GPL(nf_nat_packet); - -/* Dir is direction ICMP is coming from (opposite to packet it contains) */ -int nf_nat_icmp_reply_translation(struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int hooknum, - struct sk_buff *skb) -{ - struct { - struct icmphdr icmp; - struct iphdr ip; - } *inside; - struct nf_conntrack_tuple target; - int hdrlen = ip_hdrlen(skb); - enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); - unsigned long statusbit; - enum nf_nat_manip_type manip = HOOK2MANIP(hooknum); - - if (!skb_make_writable(skb, hdrlen + sizeof(*inside))) - return 0; - - inside = (void *)skb->data + hdrlen; - - /* We're actually going to mangle it beyond trivial checksum - adjustment, so make sure the current checksum is correct. */ - if (nf_ip_checksum(skb, hooknum, hdrlen, 0)) - return 0; - - /* Must be RELATED */ - NF_CT_ASSERT(skb->nfctinfo == IP_CT_RELATED || - skb->nfctinfo == IP_CT_RELATED_REPLY); - - /* Redirects on non-null nats must be dropped, else they'll - start talking to each other without our translation, and be - confused... --RR */ - if (inside->icmp.type == ICMP_REDIRECT) { - /* If NAT isn't finished, assume it and drop. */ - if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK) - return 0; - - if (ct->status & IPS_NAT_MASK) - return 0; - } - - if (manip == NF_NAT_MANIP_SRC) - statusbit = IPS_SRC_NAT; - else - statusbit = IPS_DST_NAT; - - /* Invert if this is reply dir. */ - if (dir == IP_CT_DIR_REPLY) - statusbit ^= IPS_NAT_MASK; - - if (!(ct->status & statusbit)) - return 1; - - pr_debug("icmp_reply_translation: translating error %p manip %u " - "dir %s\n", skb, manip, - dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY"); - - /* Change inner back to look like incoming packet. We do the - opposite manip on this hook to normal, because it might not - pass all hooks (locally-generated ICMP). Consider incoming - packet: PREROUTING (DST manip), routing produces ICMP, goes - through POSTROUTING (which must correct the DST manip). */ - if (!manip_pkt(inside->ip.protocol, skb, hdrlen + sizeof(inside->icmp), - &ct->tuplehash[!dir].tuple, !manip)) - return 0; - - if (skb->ip_summed != CHECKSUM_PARTIAL) { - /* Reloading "inside" here since manip_pkt inner. */ - inside = (void *)skb->data + hdrlen; - inside->icmp.checksum = 0; - inside->icmp.checksum = - csum_fold(skb_checksum(skb, hdrlen, - skb->len - hdrlen, 0)); - } - - /* Change outer to look the reply to an incoming packet - * (proto 0 means don't invert per-proto part). */ - nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); - if (!manip_pkt(0, skb, 0, &target, manip)) - return 0; - - return 1; -} -EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation); - -/* Protocol registration. */ -int nf_nat_protocol_register(const struct nf_nat_protocol *proto) -{ - int ret = 0; - - spin_lock_bh(&nf_nat_lock); - if (rcu_dereference_protected( - nf_nat_protos[proto->protonum], - lockdep_is_held(&nf_nat_lock) - ) != &nf_nat_unknown_protocol) { - ret = -EBUSY; - goto out; - } - RCU_INIT_POINTER(nf_nat_protos[proto->protonum], proto); - out: - spin_unlock_bh(&nf_nat_lock); - return ret; -} -EXPORT_SYMBOL(nf_nat_protocol_register); - -/* No one stores the protocol anywhere; simply delete it. */ -void nf_nat_protocol_unregister(const struct nf_nat_protocol *proto) -{ - spin_lock_bh(&nf_nat_lock); - RCU_INIT_POINTER(nf_nat_protos[proto->protonum], - &nf_nat_unknown_protocol); - spin_unlock_bh(&nf_nat_lock); - synchronize_rcu(); -} -EXPORT_SYMBOL(nf_nat_protocol_unregister); - -/* No one using conntrack by the time this called. */ -static void nf_nat_cleanup_conntrack(struct nf_conn *ct) -{ - struct nf_conn_nat *nat = nf_ct_ext_find(ct, NF_CT_EXT_NAT); - - if (nat == NULL || nat->ct == NULL) - return; - - NF_CT_ASSERT(nat->ct->status & IPS_SRC_NAT_DONE); - - spin_lock_bh(&nf_nat_lock); - hlist_del_rcu(&nat->bysource); - spin_unlock_bh(&nf_nat_lock); -} - -static void nf_nat_move_storage(void *new, void *old) -{ - struct nf_conn_nat *new_nat = new; - struct nf_conn_nat *old_nat = old; - struct nf_conn *ct = old_nat->ct; - - if (!ct || !(ct->status & IPS_SRC_NAT_DONE)) - return; - - spin_lock_bh(&nf_nat_lock); - hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource); - spin_unlock_bh(&nf_nat_lock); -} - -static struct nf_ct_ext_type nat_extend __read_mostly = { - .len = sizeof(struct nf_conn_nat), - .align = __alignof__(struct nf_conn_nat), - .destroy = nf_nat_cleanup_conntrack, - .move = nf_nat_move_storage, - .id = NF_CT_EXT_NAT, - .flags = NF_CT_EXT_F_PREALLOC, -}; - -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - -#include -#include - -static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = { - [CTA_PROTONAT_PORT_MIN] = { .type = NLA_U16 }, - [CTA_PROTONAT_PORT_MAX] = { .type = NLA_U16 }, -}; - -static int nfnetlink_parse_nat_proto(struct nlattr *attr, - const struct nf_conn *ct, - struct nf_nat_ipv4_range *range) -{ - struct nlattr *tb[CTA_PROTONAT_MAX+1]; - const struct nf_nat_protocol *npt; - int err; - - err = nla_parse_nested(tb, CTA_PROTONAT_MAX, attr, protonat_nla_policy); - if (err < 0) - return err; - - rcu_read_lock(); - npt = __nf_nat_proto_find(nf_ct_protonum(ct)); - if (npt->nlattr_to_range) - err = npt->nlattr_to_range(tb, range); - rcu_read_unlock(); - return err; -} - -static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = { - [CTA_NAT_MINIP] = { .type = NLA_U32 }, - [CTA_NAT_MAXIP] = { .type = NLA_U32 }, - [CTA_NAT_PROTO] = { .type = NLA_NESTED }, -}; - -static int -nfnetlink_parse_nat(const struct nlattr *nat, - const struct nf_conn *ct, struct nf_nat_ipv4_range *range) -{ - struct nlattr *tb[CTA_NAT_MAX+1]; - int err; - - memset(range, 0, sizeof(*range)); - - err = nla_parse_nested(tb, CTA_NAT_MAX, nat, nat_nla_policy); - if (err < 0) - return err; - - if (tb[CTA_NAT_MINIP]) - range->min_ip = nla_get_be32(tb[CTA_NAT_MINIP]); - - if (!tb[CTA_NAT_MAXIP]) - range->max_ip = range->min_ip; - else - range->max_ip = nla_get_be32(tb[CTA_NAT_MAXIP]); - - if (range->min_ip) - range->flags |= NF_NAT_RANGE_MAP_IPS; - - if (!tb[CTA_NAT_PROTO]) - return 0; - - err = nfnetlink_parse_nat_proto(tb[CTA_NAT_PROTO], ct, range); - if (err < 0) - return err; - - return 0; -} - -static int -nfnetlink_parse_nat_setup(struct nf_conn *ct, - enum nf_nat_manip_type manip, - const struct nlattr *attr) -{ - struct nf_nat_ipv4_range range; - - if (nfnetlink_parse_nat(attr, ct, &range) < 0) - return -EINVAL; - if (nf_nat_initialized(ct, manip)) - return -EEXIST; - - return nf_nat_setup_info(ct, &range, manip); -} -#else -static int -nfnetlink_parse_nat_setup(struct nf_conn *ct, - enum nf_nat_manip_type manip, - const struct nlattr *attr) -{ - return -EOPNOTSUPP; -} -#endif - -static int __net_init nf_nat_net_init(struct net *net) -{ - /* Leave them the same for the moment. */ - net->ipv4.nat_htable_size = net->ct.htable_size; - net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size, 0); - if (!net->ipv4.nat_bysource) - return -ENOMEM; - return 0; -} - -/* Clear NAT section of all conntracks, in case we're loaded again. */ -static int clean_nat(struct nf_conn *i, void *data) -{ - struct nf_conn_nat *nat = nfct_nat(i); - - if (!nat) - return 0; - memset(nat, 0, sizeof(*nat)); - i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | IPS_SEQ_ADJUST); - return 0; -} - -static void __net_exit nf_nat_net_exit(struct net *net) -{ - nf_ct_iterate_cleanup(net, &clean_nat, NULL); - synchronize_rcu(); - nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_htable_size); -} - -static struct pernet_operations nf_nat_net_ops = { - .init = nf_nat_net_init, - .exit = nf_nat_net_exit, -}; - -static struct nf_ct_helper_expectfn follow_master_nat = { - .name = "nat-follow-master", - .expectfn = nf_nat_follow_master, -}; - -static struct nfq_ct_nat_hook nfq_ct_nat = { - .seq_adjust = nf_nat_tcp_seq_adjust, -}; - -static int __init nf_nat_init(void) -{ - size_t i; - int ret; - - need_ipv4_conntrack(); - - ret = nf_ct_extend_register(&nat_extend); - if (ret < 0) { - printk(KERN_ERR "nf_nat_core: Unable to register extension\n"); - return ret; - } - - ret = register_pernet_subsys(&nf_nat_net_ops); - if (ret < 0) - goto cleanup_extend; - - /* Sew in builtin protocols. */ - spin_lock_bh(&nf_nat_lock); - for (i = 0; i < MAX_IP_NAT_PROTO; i++) - RCU_INIT_POINTER(nf_nat_protos[i], &nf_nat_unknown_protocol); - RCU_INIT_POINTER(nf_nat_protos[IPPROTO_TCP], &nf_nat_protocol_tcp); - RCU_INIT_POINTER(nf_nat_protos[IPPROTO_UDP], &nf_nat_protocol_udp); - RCU_INIT_POINTER(nf_nat_protos[IPPROTO_ICMP], &nf_nat_protocol_icmp); - spin_unlock_bh(&nf_nat_lock); - - /* Initialize fake conntrack so that NAT will skip it */ - nf_ct_untracked_status_or(IPS_NAT_DONE_MASK); - - l3proto = nf_ct_l3proto_find_get((u_int16_t)AF_INET); - - nf_ct_helper_expectfn_register(&follow_master_nat); - - BUG_ON(nf_nat_seq_adjust_hook != NULL); - RCU_INIT_POINTER(nf_nat_seq_adjust_hook, nf_nat_seq_adjust); - BUG_ON(nfnetlink_parse_nat_setup_hook != NULL); - RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook, - nfnetlink_parse_nat_setup); - BUG_ON(nf_ct_nat_offset != NULL); - RCU_INIT_POINTER(nf_ct_nat_offset, nf_nat_get_offset); - RCU_INIT_POINTER(nfq_ct_nat_hook, &nfq_ct_nat); - return 0; - - cleanup_extend: - nf_ct_extend_unregister(&nat_extend); - return ret; -} - -static void __exit nf_nat_cleanup(void) -{ - unregister_pernet_subsys(&nf_nat_net_ops); - nf_ct_l3proto_put(l3proto); - nf_ct_extend_unregister(&nat_extend); - nf_ct_helper_expectfn_unregister(&follow_master_nat); - RCU_INIT_POINTER(nf_nat_seq_adjust_hook, NULL); - RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook, NULL); - RCU_INIT_POINTER(nf_ct_nat_offset, NULL); - RCU_INIT_POINTER(nfq_ct_nat_hook, NULL); - synchronize_net(); -} - -MODULE_LICENSE("GPL"); -MODULE_ALIAS("nf-nat-ipv4"); - -module_init(nf_nat_init); -module_exit(nf_nat_cleanup); diff --git a/net/ipv4/netfilter/nf_nat_ftp.c b/net/ipv4/netfilter/nf_nat_ftp.c index 5589f3af4a8e..dd5e387fc03b 100644 --- a/net/ipv4/netfilter/nf_nat_ftp.c +++ b/net/ipv4/netfilter/nf_nat_ftp.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c index d2c228db38b5..9c3db10b22d3 100644 --- a/net/ipv4/netfilter/nf_nat_h323.c +++ b/net/ipv4/netfilter/nf_nat_h323.c @@ -15,7 +15,6 @@ #include #include -#include #include #include #include @@ -392,7 +391,7 @@ static int nat_h245(struct sk_buff *skb, struct nf_conn *ct, static void ip_nat_q931_expect(struct nf_conn *new, struct nf_conntrack_expect *this) { - struct nf_nat_ipv4_range range; + struct nf_nat_range range; if (this->tuple.src.u3.ip != 0) { /* Only accept calls from GK */ nf_nat_follow_master(new, this); @@ -404,14 +403,15 @@ static void ip_nat_q931_expect(struct nf_conn *new, /* Change src to where master sends to */ range.flags = NF_NAT_RANGE_MAP_IPS; - range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.u3.ip; + range.min_addr = range.max_addr = + new->tuplehash[!this->dir].tuple.src.u3; nf_nat_setup_info(new, &range, NF_NAT_MANIP_SRC); /* For DST manip, map port here to where it's expected. */ range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED); - range.min = range.max = this->saved_proto; - range.min_ip = range.max_ip = - new->master->tuplehash[!this->dir].tuple.src.u3.ip; + range.min_proto = range.max_proto = this->saved_proto; + range.min_addr = range.max_addr = + new->master->tuplehash[!this->dir].tuple.src.u3; nf_nat_setup_info(new, &range, NF_NAT_MANIP_DST); } @@ -490,20 +490,21 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct, static void ip_nat_callforwarding_expect(struct nf_conn *new, struct nf_conntrack_expect *this) { - struct nf_nat_ipv4_range range; + struct nf_nat_range range; /* This must be a fresh one. */ BUG_ON(new->status & IPS_NAT_DONE_MASK); /* Change src to where master sends to */ range.flags = NF_NAT_RANGE_MAP_IPS; - range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.u3.ip; + range.min_addr = range.max_addr = + new->tuplehash[!this->dir].tuple.src.u3; nf_nat_setup_info(new, &range, NF_NAT_MANIP_SRC); /* For DST manip, map port here to where it's expected. */ range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED); - range.min = range.max = this->saved_proto; - range.min_ip = range.max_ip = this->saved_ip; + range.min_proto = range.max_proto = this->saved_proto; + range.min_addr = range.max_addr = this->saved_addr; nf_nat_setup_info(new, &range, NF_NAT_MANIP_DST); } @@ -519,7 +520,7 @@ static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct, u_int16_t nated_port; /* Set expectations for NAT */ - exp->saved_ip = exp->tuple.dst.u3.ip; + exp->saved_addr = exp->tuple.dst.u3; exp->tuple.dst.u3.ip = ct->tuplehash[!dir].tuple.dst.u3.ip; exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port; exp->expectfn = ip_nat_callforwarding_expect; diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c deleted file mode 100644 index 2fefec5e757c..000000000000 --- a/net/ipv4/netfilter/nf_nat_helper.c +++ /dev/null @@ -1,461 +0,0 @@ -/* ip_nat_helper.c - generic support functions for NAT helpers - * - * (C) 2000-2002 Harald Welte - * (C) 2003-2006 Netfilter Core Team - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define DUMP_OFFSET(x) \ - pr_debug("offset_before=%d, offset_after=%d, correction_pos=%u\n", \ - x->offset_before, x->offset_after, x->correction_pos); - -static DEFINE_SPINLOCK(nf_nat_seqofs_lock); - -/* Setup TCP sequence correction given this change at this sequence */ -static inline void -adjust_tcp_sequence(u32 seq, - int sizediff, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo) -{ - enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); - struct nf_conn_nat *nat = nfct_nat(ct); - struct nf_nat_seq *this_way = &nat->seq[dir]; - - pr_debug("adjust_tcp_sequence: seq = %u, sizediff = %d\n", - seq, sizediff); - - pr_debug("adjust_tcp_sequence: Seq_offset before: "); - DUMP_OFFSET(this_way); - - spin_lock_bh(&nf_nat_seqofs_lock); - - /* SYN adjust. If it's uninitialized, or this is after last - * correction, record it: we don't handle more than one - * adjustment in the window, but do deal with common case of a - * retransmit */ - if (this_way->offset_before == this_way->offset_after || - before(this_way->correction_pos, seq)) { - this_way->correction_pos = seq; - this_way->offset_before = this_way->offset_after; - this_way->offset_after += sizediff; - } - spin_unlock_bh(&nf_nat_seqofs_lock); - - pr_debug("adjust_tcp_sequence: Seq_offset after: "); - DUMP_OFFSET(this_way); -} - -/* Get the offset value, for conntrack */ -s16 nf_nat_get_offset(const struct nf_conn *ct, - enum ip_conntrack_dir dir, - u32 seq) -{ - struct nf_conn_nat *nat = nfct_nat(ct); - struct nf_nat_seq *this_way; - s16 offset; - - if (!nat) - return 0; - - this_way = &nat->seq[dir]; - spin_lock_bh(&nf_nat_seqofs_lock); - offset = after(seq, this_way->correction_pos) - ? this_way->offset_after : this_way->offset_before; - spin_unlock_bh(&nf_nat_seqofs_lock); - - return offset; -} -EXPORT_SYMBOL_GPL(nf_nat_get_offset); - -/* Frobs data inside this packet, which is linear. */ -static void mangle_contents(struct sk_buff *skb, - unsigned int dataoff, - unsigned int match_offset, - unsigned int match_len, - const char *rep_buffer, - unsigned int rep_len) -{ - unsigned char *data; - - BUG_ON(skb_is_nonlinear(skb)); - data = skb_network_header(skb) + dataoff; - - /* move post-replacement */ - memmove(data + match_offset + rep_len, - data + match_offset + match_len, - skb->tail - (skb->network_header + dataoff + - match_offset + match_len)); - - /* insert data from buffer */ - memcpy(data + match_offset, rep_buffer, rep_len); - - /* update skb info */ - if (rep_len > match_len) { - pr_debug("nf_nat_mangle_packet: Extending packet by " - "%u from %u bytes\n", rep_len - match_len, skb->len); - skb_put(skb, rep_len - match_len); - } else { - pr_debug("nf_nat_mangle_packet: Shrinking packet from " - "%u from %u bytes\n", match_len - rep_len, skb->len); - __skb_trim(skb, skb->len + rep_len - match_len); - } - - /* fix IP hdr checksum information */ - ip_hdr(skb)->tot_len = htons(skb->len); - ip_send_check(ip_hdr(skb)); -} - -/* Unusual, but possible case. */ -static int enlarge_skb(struct sk_buff *skb, unsigned int extra) -{ - if (skb->len + extra > 65535) - return 0; - - if (pskb_expand_head(skb, 0, extra - skb_tailroom(skb), GFP_ATOMIC)) - return 0; - - return 1; -} - -void nf_nat_set_seq_adjust(struct nf_conn *ct, enum ip_conntrack_info ctinfo, - __be32 seq, s16 off) -{ - if (!off) - return; - set_bit(IPS_SEQ_ADJUST_BIT, &ct->status); - adjust_tcp_sequence(ntohl(seq), off, ct, ctinfo); - nf_conntrack_event_cache(IPCT_NATSEQADJ, ct); -} -EXPORT_SYMBOL_GPL(nf_nat_set_seq_adjust); - -void nf_nat_tcp_seq_adjust(struct sk_buff *skb, struct nf_conn *ct, - u32 ctinfo, int off) -{ - const struct tcphdr *th; - - if (nf_ct_protonum(ct) != IPPROTO_TCP) - return; - - th = (struct tcphdr *)(skb_network_header(skb)+ ip_hdrlen(skb)); - nf_nat_set_seq_adjust(ct, ctinfo, th->seq, off); -} -EXPORT_SYMBOL_GPL(nf_nat_tcp_seq_adjust); - -static void nf_nat_csum(struct sk_buff *skb, const struct iphdr *iph, void *data, - int datalen, __sum16 *check, int oldlen) -{ - struct rtable *rt = skb_rtable(skb); - - if (skb->ip_summed != CHECKSUM_PARTIAL) { - if (!(rt->rt_flags & RTCF_LOCAL) && - (!skb->dev || skb->dev->features & NETIF_F_V4_CSUM)) { - skb->ip_summed = CHECKSUM_PARTIAL; - skb->csum_start = skb_headroom(skb) + - skb_network_offset(skb) + - iph->ihl * 4; - skb->csum_offset = (void *)check - data; - *check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, - datalen, iph->protocol, 0); - } else { - *check = 0; - *check = csum_tcpudp_magic(iph->saddr, iph->daddr, - datalen, iph->protocol, - csum_partial(data, datalen, - 0)); - if (iph->protocol == IPPROTO_UDP && !*check) - *check = CSUM_MANGLED_0; - } - } else - inet_proto_csum_replace2(check, skb, - htons(oldlen), htons(datalen), 1); -} - -/* Generic function for mangling variable-length address changes inside - * NATed TCP connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX - * command in FTP). - * - * Takes care about all the nasty sequence number changes, checksumming, - * skb enlargement, ... - * - * */ -int __nf_nat_mangle_tcp_packet(struct sk_buff *skb, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int protoff, - unsigned int match_offset, - unsigned int match_len, - const char *rep_buffer, - unsigned int rep_len, bool adjust) -{ - struct iphdr *iph; - struct tcphdr *tcph; - int oldlen, datalen; - - if (!skb_make_writable(skb, skb->len)) - return 0; - - if (rep_len > match_len && - rep_len - match_len > skb_tailroom(skb) && - !enlarge_skb(skb, rep_len - match_len)) - return 0; - - SKB_LINEAR_ASSERT(skb); - - iph = ip_hdr(skb); - tcph = (void *)iph + iph->ihl*4; - - oldlen = skb->len - iph->ihl*4; - mangle_contents(skb, iph->ihl*4 + tcph->doff*4, - match_offset, match_len, rep_buffer, rep_len); - - datalen = skb->len - iph->ihl*4; - nf_nat_csum(skb, iph, tcph, datalen, &tcph->check, oldlen); - - if (adjust && rep_len != match_len) - nf_nat_set_seq_adjust(ct, ctinfo, tcph->seq, - (int)rep_len - (int)match_len); - - return 1; -} -EXPORT_SYMBOL(__nf_nat_mangle_tcp_packet); - -/* Generic function for mangling variable-length address changes inside - * NATed UDP connections (like the CONNECT DATA XXXXX MESG XXXXX INDEX XXXXX - * command in the Amanda protocol) - * - * Takes care about all the nasty sequence number changes, checksumming, - * skb enlargement, ... - * - * XXX - This function could be merged with nf_nat_mangle_tcp_packet which - * should be fairly easy to do. - */ -int -nf_nat_mangle_udp_packet(struct sk_buff *skb, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int protoff, - unsigned int match_offset, - unsigned int match_len, - const char *rep_buffer, - unsigned int rep_len) -{ - struct iphdr *iph; - struct udphdr *udph; - int datalen, oldlen; - - if (!skb_make_writable(skb, skb->len)) - return 0; - - if (rep_len > match_len && - rep_len - match_len > skb_tailroom(skb) && - !enlarge_skb(skb, rep_len - match_len)) - return 0; - - iph = ip_hdr(skb); - udph = (void *)iph + iph->ihl*4; - - oldlen = skb->len - iph->ihl*4; - mangle_contents(skb, iph->ihl*4 + sizeof(*udph), - match_offset, match_len, rep_buffer, rep_len); - - /* update the length of the UDP packet */ - datalen = skb->len - iph->ihl*4; - udph->len = htons(datalen); - - /* fix udp checksum if udp checksum was previously calculated */ - if (!udph->check && skb->ip_summed != CHECKSUM_PARTIAL) - return 1; - - nf_nat_csum(skb, iph, udph, datalen, &udph->check, oldlen); - - return 1; -} -EXPORT_SYMBOL(nf_nat_mangle_udp_packet); - -/* Adjust one found SACK option including checksum correction */ -static void -sack_adjust(struct sk_buff *skb, - struct tcphdr *tcph, - unsigned int sackoff, - unsigned int sackend, - struct nf_nat_seq *natseq) -{ - while (sackoff < sackend) { - struct tcp_sack_block_wire *sack; - __be32 new_start_seq, new_end_seq; - - sack = (void *)skb->data + sackoff; - if (after(ntohl(sack->start_seq) - natseq->offset_before, - natseq->correction_pos)) - new_start_seq = htonl(ntohl(sack->start_seq) - - natseq->offset_after); - else - new_start_seq = htonl(ntohl(sack->start_seq) - - natseq->offset_before); - - if (after(ntohl(sack->end_seq) - natseq->offset_before, - natseq->correction_pos)) - new_end_seq = htonl(ntohl(sack->end_seq) - - natseq->offset_after); - else - new_end_seq = htonl(ntohl(sack->end_seq) - - natseq->offset_before); - - pr_debug("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n", - ntohl(sack->start_seq), new_start_seq, - ntohl(sack->end_seq), new_end_seq); - - inet_proto_csum_replace4(&tcph->check, skb, - sack->start_seq, new_start_seq, 0); - inet_proto_csum_replace4(&tcph->check, skb, - sack->end_seq, new_end_seq, 0); - sack->start_seq = new_start_seq; - sack->end_seq = new_end_seq; - sackoff += sizeof(*sack); - } -} - -/* TCP SACK sequence number adjustment */ -static inline unsigned int -nf_nat_sack_adjust(struct sk_buff *skb, - struct tcphdr *tcph, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo) -{ - unsigned int dir, optoff, optend; - struct nf_conn_nat *nat = nfct_nat(ct); - - optoff = ip_hdrlen(skb) + sizeof(struct tcphdr); - optend = ip_hdrlen(skb) + tcph->doff * 4; - - if (!skb_make_writable(skb, optend)) - return 0; - - dir = CTINFO2DIR(ctinfo); - - while (optoff < optend) { - /* Usually: option, length. */ - unsigned char *op = skb->data + optoff; - - switch (op[0]) { - case TCPOPT_EOL: - return 1; - case TCPOPT_NOP: - optoff++; - continue; - default: - /* no partial options */ - if (optoff + 1 == optend || - optoff + op[1] > optend || - op[1] < 2) - return 0; - if (op[0] == TCPOPT_SACK && - op[1] >= 2+TCPOLEN_SACK_PERBLOCK && - ((op[1] - 2) % TCPOLEN_SACK_PERBLOCK) == 0) - sack_adjust(skb, tcph, optoff+2, - optoff+op[1], &nat->seq[!dir]); - optoff += op[1]; - } - } - return 1; -} - -/* TCP sequence number adjustment. Returns 1 on success, 0 on failure */ -int -nf_nat_seq_adjust(struct sk_buff *skb, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int protoff) -{ - struct tcphdr *tcph; - int dir; - __be32 newseq, newack; - s16 seqoff, ackoff; - struct nf_conn_nat *nat = nfct_nat(ct); - struct nf_nat_seq *this_way, *other_way; - - dir = CTINFO2DIR(ctinfo); - - this_way = &nat->seq[dir]; - other_way = &nat->seq[!dir]; - - if (!skb_make_writable(skb, protoff + sizeof(*tcph))) - return 0; - - tcph = (void *)skb->data + protoff; - if (after(ntohl(tcph->seq), this_way->correction_pos)) - seqoff = this_way->offset_after; - else - seqoff = this_way->offset_before; - - if (after(ntohl(tcph->ack_seq) - other_way->offset_before, - other_way->correction_pos)) - ackoff = other_way->offset_after; - else - ackoff = other_way->offset_before; - - newseq = htonl(ntohl(tcph->seq) + seqoff); - newack = htonl(ntohl(tcph->ack_seq) - ackoff); - - inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, 0); - inet_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, 0); - - pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n", - ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq), - ntohl(newack)); - - tcph->seq = newseq; - tcph->ack_seq = newack; - - return nf_nat_sack_adjust(skb, tcph, ct, ctinfo); -} - -/* Setup NAT on this expected conntrack so it follows master. */ -/* If we fail to get a free NAT slot, we'll get dropped on confirm */ -void nf_nat_follow_master(struct nf_conn *ct, - struct nf_conntrack_expect *exp) -{ - struct nf_nat_ipv4_range range; - - /* This must be a fresh one. */ - BUG_ON(ct->status & IPS_NAT_DONE_MASK); - - /* Change src to where master sends to */ - range.flags = NF_NAT_RANGE_MAP_IPS; - range.min_ip = range.max_ip - = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip; - nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC); - - /* For DST manip, map port here to where it's expected. */ - range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED); - range.min = range.max = exp->saved_proto; - range.min_ip = range.max_ip - = ct->master->tuplehash[!exp->dir].tuple.src.u3.ip; - nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST); -} -EXPORT_SYMBOL(nf_nat_follow_master); diff --git a/net/ipv4/netfilter/nf_nat_irc.c b/net/ipv4/netfilter/nf_nat_irc.c index 5b0c20a1f08d..1ce37f89ec78 100644 --- a/net/ipv4/netfilter/nf_nat_irc.c +++ b/net/ipv4/netfilter/nf_nat_irc.c @@ -17,7 +17,6 @@ #include #include -#include #include #include #include diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c new file mode 100644 index 000000000000..d8b2e14efddc --- /dev/null +++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c @@ -0,0 +1,281 @@ +/* + * (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2006 Netfilter Core Team + * (C) 2011 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +static const struct nf_nat_l3proto nf_nat_l3proto_ipv4; + +#ifdef CONFIG_XFRM +static void nf_nat_ipv4_decode_session(struct sk_buff *skb, + const struct nf_conn *ct, + enum ip_conntrack_dir dir, + unsigned long statusbit, + struct flowi *fl) +{ + const struct nf_conntrack_tuple *t = &ct->tuplehash[dir].tuple; + struct flowi4 *fl4 = &fl->u.ip4; + + if (ct->status & statusbit) { + fl4->daddr = t->dst.u3.ip; + if (t->dst.protonum == IPPROTO_TCP || + t->dst.protonum == IPPROTO_UDP || + t->dst.protonum == IPPROTO_UDPLITE || + t->dst.protonum == IPPROTO_DCCP || + t->dst.protonum == IPPROTO_SCTP) + fl4->fl4_dport = t->dst.u.all; + } + + statusbit ^= IPS_NAT_MASK; + + if (ct->status & statusbit) { + fl4->saddr = t->src.u3.ip; + if (t->dst.protonum == IPPROTO_TCP || + t->dst.protonum == IPPROTO_UDP || + t->dst.protonum == IPPROTO_UDPLITE || + t->dst.protonum == IPPROTO_DCCP || + t->dst.protonum == IPPROTO_SCTP) + fl4->fl4_sport = t->src.u.all; + } +} +#endif /* CONFIG_XFRM */ + +static bool nf_nat_ipv4_in_range(const struct nf_conntrack_tuple *t, + const struct nf_nat_range *range) +{ + return ntohl(t->src.u3.ip) >= ntohl(range->min_addr.ip) && + ntohl(t->src.u3.ip) <= ntohl(range->max_addr.ip); +} + +static u32 nf_nat_ipv4_secure_port(const struct nf_conntrack_tuple *t, + __be16 dport) +{ + return secure_ipv4_port_ephemeral(t->src.u3.ip, t->dst.u3.ip, dport); +} + +static bool nf_nat_ipv4_manip_pkt(struct sk_buff *skb, + unsigned int iphdroff, + const struct nf_nat_l4proto *l4proto, + const struct nf_conntrack_tuple *target, + enum nf_nat_manip_type maniptype) +{ + struct iphdr *iph; + unsigned int hdroff; + + if (!skb_make_writable(skb, iphdroff + sizeof(*iph))) + return false; + + iph = (void *)skb->data + iphdroff; + hdroff = iphdroff + iph->ihl * 4; + + if (!l4proto->manip_pkt(skb, &nf_nat_l3proto_ipv4, iphdroff, hdroff, + target, maniptype)) + return false; + iph = (void *)skb->data + iphdroff; + + if (maniptype == NF_NAT_MANIP_SRC) { + csum_replace4(&iph->check, iph->saddr, target->src.u3.ip); + iph->saddr = target->src.u3.ip; + } else { + csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip); + iph->daddr = target->dst.u3.ip; + } + return true; +} + +static void nf_nat_ipv4_csum_update(struct sk_buff *skb, + unsigned int iphdroff, __sum16 *check, + const struct nf_conntrack_tuple *t, + enum nf_nat_manip_type maniptype) +{ + struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); + __be32 oldip, newip; + + if (maniptype == NF_NAT_MANIP_SRC) { + oldip = iph->saddr; + newip = t->src.u3.ip; + } else { + oldip = iph->daddr; + newip = t->dst.u3.ip; + } + inet_proto_csum_replace4(check, skb, oldip, newip, 1); +} + +static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb, + u8 proto, void *data, __sum16 *check, + int datalen, int oldlen) +{ + const struct iphdr *iph = ip_hdr(skb); + struct rtable *rt = skb_rtable(skb); + + if (skb->ip_summed != CHECKSUM_PARTIAL) { + if (!(rt->rt_flags & RTCF_LOCAL) && + (!skb->dev || skb->dev->features & NETIF_F_V4_CSUM)) { + skb->ip_summed = CHECKSUM_PARTIAL; + skb->csum_start = skb_headroom(skb) + + skb_network_offset(skb) + + ip_hdrlen(skb); + skb->csum_offset = (void *)check - data; + *check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, + datalen, proto, 0); + } else { + *check = 0; + *check = csum_tcpudp_magic(iph->saddr, iph->daddr, + datalen, proto, + csum_partial(data, datalen, + 0)); + if (proto == IPPROTO_UDP && !*check) + *check = CSUM_MANGLED_0; + } + } else + inet_proto_csum_replace2(check, skb, + htons(oldlen), htons(datalen), 1); +} + +static int nf_nat_ipv4_nlattr_to_range(struct nlattr *tb[], + struct nf_nat_range *range) +{ + if (tb[CTA_NAT_V4_MINIP]) { + range->min_addr.ip = nla_get_be32(tb[CTA_NAT_V4_MINIP]); + range->flags |= NF_NAT_RANGE_MAP_IPS; + } + + if (tb[CTA_NAT_V4_MAXIP]) + range->max_addr.ip = nla_get_be32(tb[CTA_NAT_V4_MAXIP]); + else + range->max_addr.ip = range->min_addr.ip; + + return 0; +} + +static const struct nf_nat_l3proto nf_nat_l3proto_ipv4 = { + .l3proto = NFPROTO_IPV4, + .in_range = nf_nat_ipv4_in_range, + .secure_port = nf_nat_ipv4_secure_port, + .manip_pkt = nf_nat_ipv4_manip_pkt, + .csum_update = nf_nat_ipv4_csum_update, + .csum_recalc = nf_nat_ipv4_csum_recalc, + .nlattr_to_range = nf_nat_ipv4_nlattr_to_range, +#ifdef CONFIG_XFRM + .decode_session = nf_nat_ipv4_decode_session, +#endif +}; + +int nf_nat_icmp_reply_translation(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int hooknum) +{ + struct { + struct icmphdr icmp; + struct iphdr ip; + } *inside; + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + enum nf_nat_manip_type manip = HOOK2MANIP(hooknum); + unsigned int hdrlen = ip_hdrlen(skb); + const struct nf_nat_l4proto *l4proto; + struct nf_conntrack_tuple target; + unsigned long statusbit; + + NF_CT_ASSERT(ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY); + + if (!skb_make_writable(skb, hdrlen + sizeof(*inside))) + return 0; + if (nf_ip_checksum(skb, hooknum, hdrlen, 0)) + return 0; + + inside = (void *)skb->data + hdrlen; + if (inside->icmp.type == ICMP_REDIRECT) { + if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK) + return 0; + if (ct->status & IPS_NAT_MASK) + return 0; + } + + if (manip == NF_NAT_MANIP_SRC) + statusbit = IPS_SRC_NAT; + else + statusbit = IPS_DST_NAT; + + /* Invert if this is reply direction */ + if (dir == IP_CT_DIR_REPLY) + statusbit ^= IPS_NAT_MASK; + + if (!(ct->status & statusbit)) + return 1; + + l4proto = __nf_nat_l4proto_find(NFPROTO_IPV4, inside->ip.protocol); + if (!nf_nat_ipv4_manip_pkt(skb, hdrlen + sizeof(inside->icmp), + l4proto, &ct->tuplehash[!dir].tuple, !manip)) + return 0; + + if (skb->ip_summed != CHECKSUM_PARTIAL) { + /* Reloading "inside" here since manip_pkt may reallocate */ + inside = (void *)skb->data + hdrlen; + inside->icmp.checksum = 0; + inside->icmp.checksum = + csum_fold(skb_checksum(skb, hdrlen, + skb->len - hdrlen, 0)); + } + + /* Change outer to look like the reply to an incoming packet */ + nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); + l4proto = __nf_nat_l4proto_find(NFPROTO_IPV4, 0); + if (!nf_nat_ipv4_manip_pkt(skb, 0, l4proto, &target, manip)) + return 0; + + return 1; +} +EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation); + +static int __init nf_nat_l3proto_ipv4_init(void) +{ + int err; + + err = nf_nat_l4proto_register(NFPROTO_IPV4, &nf_nat_l4proto_icmp); + if (err < 0) + goto err1; + err = nf_nat_l3proto_register(&nf_nat_l3proto_ipv4); + if (err < 0) + goto err2; + return err; + +err2: + nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_icmp); +err1: + return err; +} + +static void __exit nf_nat_l3proto_ipv4_exit(void) +{ + nf_nat_l3proto_unregister(&nf_nat_l3proto_ipv4); + nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_icmp); +} + +MODULE_LICENSE("GPL"); +MODULE_ALIAS("nf-nat-" __stringify(AF_INET)); + +module_init(nf_nat_l3proto_ipv4_init); +module_exit(nf_nat_l3proto_ipv4_exit); diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c index 31ef890d894b..a06d7d74817d 100644 --- a/net/ipv4/netfilter/nf_nat_pptp.c +++ b/net/ipv4/netfilter/nf_nat_pptp.c @@ -22,7 +22,6 @@ #include #include -#include #include #include #include @@ -47,7 +46,7 @@ static void pptp_nat_expected(struct nf_conn *ct, struct nf_conntrack_tuple t; const struct nf_ct_pptp_master *ct_pptp_info; const struct nf_nat_pptp *nat_pptp_info; - struct nf_nat_ipv4_range range; + struct nf_nat_range range; ct_pptp_info = nfct_help_data(master); nat_pptp_info = &nfct_nat(master)->help.nat_pptp_info; @@ -89,21 +88,21 @@ static void pptp_nat_expected(struct nf_conn *ct, /* Change src to where master sends to */ range.flags = NF_NAT_RANGE_MAP_IPS; - range.min_ip = range.max_ip - = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip; + range.min_addr = range.max_addr + = ct->master->tuplehash[!exp->dir].tuple.dst.u3; if (exp->dir == IP_CT_DIR_ORIGINAL) { range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; - range.min = range.max = exp->saved_proto; + range.min_proto = range.max_proto = exp->saved_proto; } nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC); /* For DST manip, map port here to where it's expected. */ range.flags = NF_NAT_RANGE_MAP_IPS; - range.min_ip = range.max_ip - = ct->master->tuplehash[!exp->dir].tuple.src.u3.ip; + range.min_addr = range.max_addr + = ct->master->tuplehash[!exp->dir].tuple.src.u3; if (exp->dir == IP_CT_DIR_REPLY) { range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; - range.min = range.max = exp->saved_proto; + range.min_proto = range.max_proto = exp->saved_proto; } nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST); } diff --git a/net/ipv4/netfilter/nf_nat_proto_common.c b/net/ipv4/netfilter/nf_nat_proto_common.c deleted file mode 100644 index 9993bc93e102..000000000000 --- a/net/ipv4/netfilter/nf_nat_proto_common.c +++ /dev/null @@ -1,114 +0,0 @@ -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2006 Netfilter Core Team - * (C) 2008 Patrick McHardy - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -bool nf_nat_proto_in_range(const struct nf_conntrack_tuple *tuple, - enum nf_nat_manip_type maniptype, - const union nf_conntrack_man_proto *min, - const union nf_conntrack_man_proto *max) -{ - __be16 port; - - if (maniptype == NF_NAT_MANIP_SRC) - port = tuple->src.u.all; - else - port = tuple->dst.u.all; - - return ntohs(port) >= ntohs(min->all) && - ntohs(port) <= ntohs(max->all); -} -EXPORT_SYMBOL_GPL(nf_nat_proto_in_range); - -void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, - const struct nf_nat_ipv4_range *range, - enum nf_nat_manip_type maniptype, - const struct nf_conn *ct, - u_int16_t *rover) -{ - unsigned int range_size, min, i; - __be16 *portptr; - u_int16_t off; - - if (maniptype == NF_NAT_MANIP_SRC) - portptr = &tuple->src.u.all; - else - portptr = &tuple->dst.u.all; - - /* If no range specified... */ - if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) { - /* If it's dst rewrite, can't change port */ - if (maniptype == NF_NAT_MANIP_DST) - return; - - if (ntohs(*portptr) < 1024) { - /* Loose convention: >> 512 is credential passing */ - if (ntohs(*portptr) < 512) { - min = 1; - range_size = 511 - min + 1; - } else { - min = 600; - range_size = 1023 - min + 1; - } - } else { - min = 1024; - range_size = 65535 - 1024 + 1; - } - } else { - min = ntohs(range->min.all); - range_size = ntohs(range->max.all) - min + 1; - } - - if (range->flags & NF_NAT_RANGE_PROTO_RANDOM) - off = secure_ipv4_port_ephemeral(tuple->src.u3.ip, tuple->dst.u3.ip, - maniptype == NF_NAT_MANIP_SRC - ? tuple->dst.u.all - : tuple->src.u.all); - else - off = *rover; - - for (i = 0; ; ++off) { - *portptr = htons(min + off % range_size); - if (++i != range_size && nf_nat_used_tuple(tuple, ct)) - continue; - if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) - *rover = off; - return; - } - return; -} -EXPORT_SYMBOL_GPL(nf_nat_proto_unique_tuple); - -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) -int nf_nat_proto_nlattr_to_range(struct nlattr *tb[], - struct nf_nat_ipv4_range *range) -{ - if (tb[CTA_PROTONAT_PORT_MIN]) { - range->min.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MIN]); - range->max.all = range->min.tcp.port; - range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED; - } - if (tb[CTA_PROTONAT_PORT_MAX]) { - range->max.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MAX]); - range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED; - } - return 0; -} -EXPORT_SYMBOL_GPL(nf_nat_proto_nlattr_to_range); -#endif diff --git a/net/ipv4/netfilter/nf_nat_proto_dccp.c b/net/ipv4/netfilter/nf_nat_proto_dccp.c deleted file mode 100644 index 3f67138d187c..000000000000 --- a/net/ipv4/netfilter/nf_nat_proto_dccp.c +++ /dev/null @@ -1,106 +0,0 @@ -/* - * DCCP NAT protocol helper - * - * Copyright (c) 2005, 2006. 2008 Patrick McHardy - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - */ - -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -static u_int16_t dccp_port_rover; - -static void -dccp_unique_tuple(struct nf_conntrack_tuple *tuple, - const struct nf_nat_ipv4_range *range, - enum nf_nat_manip_type maniptype, - const struct nf_conn *ct) -{ - nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, - &dccp_port_rover); -} - -static bool -dccp_manip_pkt(struct sk_buff *skb, - unsigned int iphdroff, - const struct nf_conntrack_tuple *tuple, - enum nf_nat_manip_type maniptype) -{ - const struct iphdr *iph = (const void *)(skb->data + iphdroff); - struct dccp_hdr *hdr; - unsigned int hdroff = iphdroff + iph->ihl * 4; - __be32 oldip, newip; - __be16 *portptr, oldport, newport; - int hdrsize = 8; /* DCCP connection tracking guarantees this much */ - - if (skb->len >= hdroff + sizeof(struct dccp_hdr)) - hdrsize = sizeof(struct dccp_hdr); - - if (!skb_make_writable(skb, hdroff + hdrsize)) - return false; - - iph = (struct iphdr *)(skb->data + iphdroff); - hdr = (struct dccp_hdr *)(skb->data + hdroff); - - if (maniptype == NF_NAT_MANIP_SRC) { - oldip = iph->saddr; - newip = tuple->src.u3.ip; - newport = tuple->src.u.dccp.port; - portptr = &hdr->dccph_sport; - } else { - oldip = iph->daddr; - newip = tuple->dst.u3.ip; - newport = tuple->dst.u.dccp.port; - portptr = &hdr->dccph_dport; - } - - oldport = *portptr; - *portptr = newport; - - if (hdrsize < sizeof(*hdr)) - return true; - - inet_proto_csum_replace4(&hdr->dccph_checksum, skb, oldip, newip, 1); - inet_proto_csum_replace2(&hdr->dccph_checksum, skb, oldport, newport, - 0); - return true; -} - -static const struct nf_nat_protocol nf_nat_protocol_dccp = { - .protonum = IPPROTO_DCCP, - .manip_pkt = dccp_manip_pkt, - .in_range = nf_nat_proto_in_range, - .unique_tuple = dccp_unique_tuple, -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .nlattr_to_range = nf_nat_proto_nlattr_to_range, -#endif -}; - -static int __init nf_nat_proto_dccp_init(void) -{ - return nf_nat_protocol_register(&nf_nat_protocol_dccp); -} - -static void __exit nf_nat_proto_dccp_fini(void) -{ - nf_nat_protocol_unregister(&nf_nat_protocol_dccp); -} - -module_init(nf_nat_proto_dccp_init); -module_exit(nf_nat_proto_dccp_fini); - -MODULE_AUTHOR("Patrick McHardy "); -MODULE_DESCRIPTION("DCCP NAT protocol helper"); -MODULE_LICENSE("GPL"); diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c index 46ba0b9ab985..ea44f02563b5 100644 --- a/net/ipv4/netfilter/nf_nat_proto_gre.c +++ b/net/ipv4/netfilter/nf_nat_proto_gre.c @@ -28,8 +28,7 @@ #include #include -#include -#include +#include #include MODULE_LICENSE("GPL"); @@ -38,8 +37,9 @@ MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE"); /* generate unique tuple ... */ static void -gre_unique_tuple(struct nf_conntrack_tuple *tuple, - const struct nf_nat_ipv4_range *range, +gre_unique_tuple(const struct nf_nat_l3proto *l3proto, + struct nf_conntrack_tuple *tuple, + const struct nf_nat_range *range, enum nf_nat_manip_type maniptype, const struct nf_conn *ct) { @@ -62,8 +62,8 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple, min = 1; range_size = 0xffff; } else { - min = ntohs(range->min.gre.key); - range_size = ntohs(range->max.gre.key) - min + 1; + min = ntohs(range->min_proto.gre.key); + range_size = ntohs(range->max_proto.gre.key) - min + 1; } pr_debug("min = %u, range_size = %u\n", min, range_size); @@ -80,14 +80,14 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple, /* manipulate a GRE packet according to maniptype */ static bool -gre_manip_pkt(struct sk_buff *skb, unsigned int iphdroff, +gre_manip_pkt(struct sk_buff *skb, + const struct nf_nat_l3proto *l3proto, + unsigned int iphdroff, unsigned int hdroff, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype) { const struct gre_hdr *greh; struct gre_hdr_pptp *pgreh; - const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); - unsigned int hdroff = iphdroff + iph->ihl * 4; /* pgreh includes two optional 32bit fields which are not required * to be there. That's where the magic '8' comes from */ @@ -117,24 +117,24 @@ gre_manip_pkt(struct sk_buff *skb, unsigned int iphdroff, return true; } -static const struct nf_nat_protocol gre = { - .protonum = IPPROTO_GRE, +static const struct nf_nat_l4proto gre = { + .l4proto = IPPROTO_GRE, .manip_pkt = gre_manip_pkt, - .in_range = nf_nat_proto_in_range, + .in_range = nf_nat_l4proto_in_range, .unique_tuple = gre_unique_tuple, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .nlattr_to_range = nf_nat_proto_nlattr_to_range, + .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, #endif }; static int __init nf_nat_proto_gre_init(void) { - return nf_nat_protocol_register(&gre); + return nf_nat_l4proto_register(NFPROTO_IPV4, &gre); } static void __exit nf_nat_proto_gre_fini(void) { - nf_nat_protocol_unregister(&gre); + nf_nat_l4proto_unregister(NFPROTO_IPV4, &gre); } module_init(nf_nat_proto_gre_init); diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c index b35172851bae..eb303471bcf6 100644 --- a/net/ipv4/netfilter/nf_nat_proto_icmp.c +++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c @@ -15,8 +15,7 @@ #include #include #include -#include -#include +#include static bool icmp_in_range(const struct nf_conntrack_tuple *tuple, @@ -29,8 +28,9 @@ icmp_in_range(const struct nf_conntrack_tuple *tuple, } static void -icmp_unique_tuple(struct nf_conntrack_tuple *tuple, - const struct nf_nat_ipv4_range *range, +icmp_unique_tuple(const struct nf_nat_l3proto *l3proto, + struct nf_conntrack_tuple *tuple, + const struct nf_nat_range *range, enum nf_nat_manip_type maniptype, const struct nf_conn *ct) { @@ -38,13 +38,14 @@ icmp_unique_tuple(struct nf_conntrack_tuple *tuple, unsigned int range_size; unsigned int i; - range_size = ntohs(range->max.icmp.id) - ntohs(range->min.icmp.id) + 1; + range_size = ntohs(range->max_proto.icmp.id) - + ntohs(range->min_proto.icmp.id) + 1; /* If no range specified... */ if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) range_size = 0xFFFF; for (i = 0; ; ++id) { - tuple->src.u.icmp.id = htons(ntohs(range->min.icmp.id) + + tuple->src.u.icmp.id = htons(ntohs(range->min_proto.icmp.id) + (id % range_size)); if (++i == range_size || !nf_nat_used_tuple(tuple, ct)) return; @@ -54,13 +55,12 @@ icmp_unique_tuple(struct nf_conntrack_tuple *tuple, static bool icmp_manip_pkt(struct sk_buff *skb, - unsigned int iphdroff, + const struct nf_nat_l3proto *l3proto, + unsigned int iphdroff, unsigned int hdroff, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype) { - const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); struct icmphdr *hdr; - unsigned int hdroff = iphdroff + iph->ihl*4; if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) return false; @@ -72,12 +72,12 @@ icmp_manip_pkt(struct sk_buff *skb, return true; } -const struct nf_nat_protocol nf_nat_protocol_icmp = { - .protonum = IPPROTO_ICMP, +const struct nf_nat_l4proto nf_nat_l4proto_icmp = { + .l4proto = IPPROTO_ICMP, .manip_pkt = icmp_manip_pkt, .in_range = icmp_in_range, .unique_tuple = icmp_unique_tuple, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .nlattr_to_range = nf_nat_proto_nlattr_to_range, + .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, #endif }; diff --git a/net/ipv4/netfilter/nf_nat_proto_sctp.c b/net/ipv4/netfilter/nf_nat_proto_sctp.c deleted file mode 100644 index 3cce9b6c1c29..000000000000 --- a/net/ipv4/netfilter/nf_nat_proto_sctp.c +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright (c) 2008 Patrick McHardy - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include -#include -#include -#include -#include - -#include - -static u_int16_t nf_sctp_port_rover; - -static void -sctp_unique_tuple(struct nf_conntrack_tuple *tuple, - const struct nf_nat_ipv4_range *range, - enum nf_nat_manip_type maniptype, - const struct nf_conn *ct) -{ - nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, - &nf_sctp_port_rover); -} - -static bool -sctp_manip_pkt(struct sk_buff *skb, - unsigned int iphdroff, - const struct nf_conntrack_tuple *tuple, - enum nf_nat_manip_type maniptype) -{ - const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); - struct sk_buff *frag; - sctp_sctphdr_t *hdr; - unsigned int hdroff = iphdroff + iph->ihl*4; - __be32 oldip, newip; - __be32 crc32; - - if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) - return false; - - iph = (struct iphdr *)(skb->data + iphdroff); - hdr = (struct sctphdr *)(skb->data + hdroff); - - if (maniptype == NF_NAT_MANIP_SRC) { - /* Get rid of src ip and src pt */ - oldip = iph->saddr; - newip = tuple->src.u3.ip; - hdr->source = tuple->src.u.sctp.port; - } else { - /* Get rid of dst ip and dst pt */ - oldip = iph->daddr; - newip = tuple->dst.u3.ip; - hdr->dest = tuple->dst.u.sctp.port; - } - - crc32 = sctp_start_cksum((u8 *)hdr, skb_headlen(skb) - hdroff); - skb_walk_frags(skb, frag) - crc32 = sctp_update_cksum((u8 *)frag->data, skb_headlen(frag), - crc32); - crc32 = sctp_end_cksum(crc32); - hdr->checksum = crc32; - - return true; -} - -static const struct nf_nat_protocol nf_nat_protocol_sctp = { - .protonum = IPPROTO_SCTP, - .manip_pkt = sctp_manip_pkt, - .in_range = nf_nat_proto_in_range, - .unique_tuple = sctp_unique_tuple, -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .nlattr_to_range = nf_nat_proto_nlattr_to_range, -#endif -}; - -static int __init nf_nat_proto_sctp_init(void) -{ - return nf_nat_protocol_register(&nf_nat_protocol_sctp); -} - -static void __exit nf_nat_proto_sctp_exit(void) -{ - nf_nat_protocol_unregister(&nf_nat_protocol_sctp); -} - -module_init(nf_nat_proto_sctp_init); -module_exit(nf_nat_proto_sctp_exit); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("SCTP NAT protocol helper"); -MODULE_AUTHOR("Patrick McHardy "); diff --git a/net/ipv4/netfilter/nf_nat_proto_tcp.c b/net/ipv4/netfilter/nf_nat_proto_tcp.c deleted file mode 100644 index 9fb4b4e72bbf..000000000000 --- a/net/ipv4/netfilter/nf_nat_proto_tcp.c +++ /dev/null @@ -1,91 +0,0 @@ -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2006 Netfilter Core Team - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -static u_int16_t tcp_port_rover; - -static void -tcp_unique_tuple(struct nf_conntrack_tuple *tuple, - const struct nf_nat_ipv4_range *range, - enum nf_nat_manip_type maniptype, - const struct nf_conn *ct) -{ - nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, &tcp_port_rover); -} - -static bool -tcp_manip_pkt(struct sk_buff *skb, - unsigned int iphdroff, - const struct nf_conntrack_tuple *tuple, - enum nf_nat_manip_type maniptype) -{ - const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); - struct tcphdr *hdr; - unsigned int hdroff = iphdroff + iph->ihl*4; - __be32 oldip, newip; - __be16 *portptr, newport, oldport; - int hdrsize = 8; /* TCP connection tracking guarantees this much */ - - /* this could be a inner header returned in icmp packet; in such - cases we cannot update the checksum field since it is outside of - the 8 bytes of transport layer headers we are guaranteed */ - if (skb->len >= hdroff + sizeof(struct tcphdr)) - hdrsize = sizeof(struct tcphdr); - - if (!skb_make_writable(skb, hdroff + hdrsize)) - return false; - - iph = (struct iphdr *)(skb->data + iphdroff); - hdr = (struct tcphdr *)(skb->data + hdroff); - - if (maniptype == NF_NAT_MANIP_SRC) { - /* Get rid of src ip and src pt */ - oldip = iph->saddr; - newip = tuple->src.u3.ip; - newport = tuple->src.u.tcp.port; - portptr = &hdr->source; - } else { - /* Get rid of dst ip and dst pt */ - oldip = iph->daddr; - newip = tuple->dst.u3.ip; - newport = tuple->dst.u.tcp.port; - portptr = &hdr->dest; - } - - oldport = *portptr; - *portptr = newport; - - if (hdrsize < sizeof(*hdr)) - return true; - - inet_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1); - inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, 0); - return true; -} - -const struct nf_nat_protocol nf_nat_protocol_tcp = { - .protonum = IPPROTO_TCP, - .manip_pkt = tcp_manip_pkt, - .in_range = nf_nat_proto_in_range, - .unique_tuple = tcp_unique_tuple, -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .nlattr_to_range = nf_nat_proto_nlattr_to_range, -#endif -}; diff --git a/net/ipv4/netfilter/nf_nat_proto_udp.c b/net/ipv4/netfilter/nf_nat_proto_udp.c deleted file mode 100644 index 9883336e628f..000000000000 --- a/net/ipv4/netfilter/nf_nat_proto_udp.c +++ /dev/null @@ -1,82 +0,0 @@ -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2006 Netfilter Core Team - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -static u_int16_t udp_port_rover; - -static void -udp_unique_tuple(struct nf_conntrack_tuple *tuple, - const struct nf_nat_ipv4_range *range, - enum nf_nat_manip_type maniptype, - const struct nf_conn *ct) -{ - nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, &udp_port_rover); -} - -static bool -udp_manip_pkt(struct sk_buff *skb, - unsigned int iphdroff, - const struct nf_conntrack_tuple *tuple, - enum nf_nat_manip_type maniptype) -{ - const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); - struct udphdr *hdr; - unsigned int hdroff = iphdroff + iph->ihl*4; - __be32 oldip, newip; - __be16 *portptr, newport; - - if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) - return false; - - iph = (struct iphdr *)(skb->data + iphdroff); - hdr = (struct udphdr *)(skb->data + hdroff); - - if (maniptype == NF_NAT_MANIP_SRC) { - /* Get rid of src ip and src pt */ - oldip = iph->saddr; - newip = tuple->src.u3.ip; - newport = tuple->src.u.udp.port; - portptr = &hdr->source; - } else { - /* Get rid of dst ip and dst pt */ - oldip = iph->daddr; - newip = tuple->dst.u3.ip; - newport = tuple->dst.u.udp.port; - portptr = &hdr->dest; - } - if (hdr->check || skb->ip_summed == CHECKSUM_PARTIAL) { - inet_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1); - inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport, - 0); - if (!hdr->check) - hdr->check = CSUM_MANGLED_0; - } - *portptr = newport; - return true; -} - -const struct nf_nat_protocol nf_nat_protocol_udp = { - .protonum = IPPROTO_UDP, - .manip_pkt = udp_manip_pkt, - .in_range = nf_nat_proto_in_range, - .unique_tuple = udp_unique_tuple, -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .nlattr_to_range = nf_nat_proto_nlattr_to_range, -#endif -}; diff --git a/net/ipv4/netfilter/nf_nat_proto_udplite.c b/net/ipv4/netfilter/nf_nat_proto_udplite.c deleted file mode 100644 index d24d10a7beb2..000000000000 --- a/net/ipv4/netfilter/nf_nat_proto_udplite.c +++ /dev/null @@ -1,98 +0,0 @@ -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2006 Netfilter Core Team - * (C) 2008 Patrick McHardy - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include -#include -#include - -#include -#include -#include -#include - -static u_int16_t udplite_port_rover; - -static void -udplite_unique_tuple(struct nf_conntrack_tuple *tuple, - const struct nf_nat_ipv4_range *range, - enum nf_nat_manip_type maniptype, - const struct nf_conn *ct) -{ - nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, - &udplite_port_rover); -} - -static bool -udplite_manip_pkt(struct sk_buff *skb, - unsigned int iphdroff, - const struct nf_conntrack_tuple *tuple, - enum nf_nat_manip_type maniptype) -{ - const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); - struct udphdr *hdr; - unsigned int hdroff = iphdroff + iph->ihl*4; - __be32 oldip, newip; - __be16 *portptr, newport; - - if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) - return false; - - iph = (struct iphdr *)(skb->data + iphdroff); - hdr = (struct udphdr *)(skb->data + hdroff); - - if (maniptype == NF_NAT_MANIP_SRC) { - /* Get rid of src ip and src pt */ - oldip = iph->saddr; - newip = tuple->src.u3.ip; - newport = tuple->src.u.udp.port; - portptr = &hdr->source; - } else { - /* Get rid of dst ip and dst pt */ - oldip = iph->daddr; - newip = tuple->dst.u3.ip; - newport = tuple->dst.u.udp.port; - portptr = &hdr->dest; - } - - inet_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1); - inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport, 0); - if (!hdr->check) - hdr->check = CSUM_MANGLED_0; - - *portptr = newport; - return true; -} - -static const struct nf_nat_protocol nf_nat_protocol_udplite = { - .protonum = IPPROTO_UDPLITE, - .manip_pkt = udplite_manip_pkt, - .in_range = nf_nat_proto_in_range, - .unique_tuple = udplite_unique_tuple, -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .nlattr_to_range = nf_nat_proto_nlattr_to_range, -#endif -}; - -static int __init nf_nat_proto_udplite_init(void) -{ - return nf_nat_protocol_register(&nf_nat_protocol_udplite); -} - -static void __exit nf_nat_proto_udplite_fini(void) -{ - nf_nat_protocol_unregister(&nf_nat_protocol_udplite); -} - -module_init(nf_nat_proto_udplite_init); -module_exit(nf_nat_proto_udplite_fini); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("UDP-Lite NAT protocol helper"); -MODULE_AUTHOR("Patrick McHardy "); diff --git a/net/ipv4/netfilter/nf_nat_proto_unknown.c b/net/ipv4/netfilter/nf_nat_proto_unknown.c deleted file mode 100644 index e0afe8112b1c..000000000000 --- a/net/ipv4/netfilter/nf_nat_proto_unknown.c +++ /dev/null @@ -1,52 +0,0 @@ -/* The "unknown" protocol. This is what is used for protocols we - * don't understand. It's returned by ip_ct_find_proto(). - */ - -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2006 Netfilter Core Team - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include - -#include -#include -#include -#include - -static bool unknown_in_range(const struct nf_conntrack_tuple *tuple, - enum nf_nat_manip_type manip_type, - const union nf_conntrack_man_proto *min, - const union nf_conntrack_man_proto *max) -{ - return true; -} - -static void unknown_unique_tuple(struct nf_conntrack_tuple *tuple, - const struct nf_nat_ipv4_range *range, - enum nf_nat_manip_type maniptype, - const struct nf_conn *ct) -{ - /* Sorry: we can't help you; if it's not unique, we can't frob - anything. */ - return; -} - -static bool -unknown_manip_pkt(struct sk_buff *skb, - unsigned int iphdroff, - const struct nf_conntrack_tuple *tuple, - enum nf_nat_manip_type maniptype) -{ - return true; -} - -const struct nf_nat_protocol nf_nat_unknown_protocol = { - .manip_pkt = unknown_manip_pkt, - .in_range = unknown_in_range, - .unique_tuple = unknown_unique_tuple, -}; diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c deleted file mode 100644 index d2a9dc314e0e..000000000000 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ /dev/null @@ -1,214 +0,0 @@ -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2006 Netfilter Core Team - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -/* Everything about the rules for NAT. */ -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#define NAT_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | \ - (1 << NF_INET_POST_ROUTING) | \ - (1 << NF_INET_LOCAL_OUT) | \ - (1 << NF_INET_LOCAL_IN)) - -static const struct xt_table nat_table = { - .name = "nat", - .valid_hooks = NAT_VALID_HOOKS, - .me = THIS_MODULE, - .af = NFPROTO_IPV4, -}; - -/* Source NAT */ -static unsigned int -ipt_snat_target(struct sk_buff *skb, const struct xt_action_param *par) -{ - struct nf_conn *ct; - enum ip_conntrack_info ctinfo; - const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; - - NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING || - par->hooknum == NF_INET_LOCAL_IN); - - ct = nf_ct_get(skb, &ctinfo); - - /* Connection must be valid and new. */ - NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || - ctinfo == IP_CT_RELATED_REPLY)); - NF_CT_ASSERT(par->out != NULL); - - return nf_nat_setup_info(ct, &mr->range[0], NF_NAT_MANIP_SRC); -} - -static unsigned int -ipt_dnat_target(struct sk_buff *skb, const struct xt_action_param *par) -{ - struct nf_conn *ct; - enum ip_conntrack_info ctinfo; - const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; - - NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || - par->hooknum == NF_INET_LOCAL_OUT); - - ct = nf_ct_get(skb, &ctinfo); - - /* Connection must be valid and new. */ - NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); - - return nf_nat_setup_info(ct, &mr->range[0], NF_NAT_MANIP_DST); -} - -static int ipt_snat_checkentry(const struct xt_tgchk_param *par) -{ - const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; - - /* Must be a valid range */ - if (mr->rangesize != 1) { - pr_info("SNAT: multiple ranges no longer supported\n"); - return -EINVAL; - } - return 0; -} - -static int ipt_dnat_checkentry(const struct xt_tgchk_param *par) -{ - const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; - - /* Must be a valid range */ - if (mr->rangesize != 1) { - pr_info("DNAT: multiple ranges no longer supported\n"); - return -EINVAL; - } - return 0; -} - -static unsigned int -alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) -{ - /* Force range to this IP; let proto decide mapping for - per-proto parts (hence not NF_NAT_RANGE_PROTO_SPECIFIED). - */ - struct nf_nat_ipv4_range range; - - range.flags = 0; - pr_debug("Allocating NULL binding for %p (%pI4)\n", ct, - HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ? - &ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip : - &ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip); - - return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum)); -} - -int nf_nat_rule_find(struct sk_buff *skb, - unsigned int hooknum, - const struct net_device *in, - const struct net_device *out, - struct nf_conn *ct) -{ - struct net *net = nf_ct_net(ct); - int ret; - - ret = ipt_do_table(skb, hooknum, in, out, net->ipv4.nat_table); - - if (ret == NF_ACCEPT) { - if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum))) - /* NUL mapping */ - ret = alloc_null_binding(ct, hooknum); - } - return ret; -} - -static struct xt_target ipt_snat_reg __read_mostly = { - .name = "SNAT", - .target = ipt_snat_target, - .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), - .table = "nat", - .hooks = (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_LOCAL_IN), - .checkentry = ipt_snat_checkentry, - .family = AF_INET, -}; - -static struct xt_target ipt_dnat_reg __read_mostly = { - .name = "DNAT", - .target = ipt_dnat_target, - .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), - .table = "nat", - .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT), - .checkentry = ipt_dnat_checkentry, - .family = AF_INET, -}; - -static int __net_init nf_nat_rule_net_init(struct net *net) -{ - struct ipt_replace *repl; - - repl = ipt_alloc_initial_table(&nat_table); - if (repl == NULL) - return -ENOMEM; - net->ipv4.nat_table = ipt_register_table(net, &nat_table, repl); - kfree(repl); - if (IS_ERR(net->ipv4.nat_table)) - return PTR_ERR(net->ipv4.nat_table); - return 0; -} - -static void __net_exit nf_nat_rule_net_exit(struct net *net) -{ - ipt_unregister_table(net, net->ipv4.nat_table); -} - -static struct pernet_operations nf_nat_rule_net_ops = { - .init = nf_nat_rule_net_init, - .exit = nf_nat_rule_net_exit, -}; - -int __init nf_nat_rule_init(void) -{ - int ret; - - ret = register_pernet_subsys(&nf_nat_rule_net_ops); - if (ret != 0) - goto out; - ret = xt_register_target(&ipt_snat_reg); - if (ret != 0) - goto unregister_table; - - ret = xt_register_target(&ipt_dnat_reg); - if (ret != 0) - goto unregister_snat; - - return ret; - - unregister_snat: - xt_unregister_target(&ipt_snat_reg); - unregister_table: - unregister_pernet_subsys(&nf_nat_rule_net_ops); - out: - return ret; -} - -void nf_nat_rule_cleanup(void) -{ - xt_unregister_target(&ipt_dnat_reg); - xt_unregister_target(&ipt_snat_reg); - unregister_pernet_subsys(&nf_nat_rule_net_ops); -} diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c index df626af8413c..47a47186a791 100644 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ b/net/ipv4/netfilter/nf_nat_sip.c @@ -19,7 +19,6 @@ #include #include -#include #include #include #include @@ -255,15 +254,15 @@ static void ip_nat_sip_seq_adjust(struct sk_buff *skb, s16 off) static void ip_nat_sip_expected(struct nf_conn *ct, struct nf_conntrack_expect *exp) { - struct nf_nat_ipv4_range range; + struct nf_nat_range range; /* This must be a fresh one. */ BUG_ON(ct->status & IPS_NAT_DONE_MASK); /* For DST manip, map port here to where it's expected. */ range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED); - range.min = range.max = exp->saved_proto; - range.min_ip = range.max_ip = exp->saved_ip; + range.min_proto = range.max_proto = exp->saved_proto; + range.min_addr = range.max_addr = exp->saved_addr; nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST); /* Change src to where master sends to, but only if the connection @@ -271,8 +270,8 @@ static void ip_nat_sip_expected(struct nf_conn *ct, if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == ct->master->tuplehash[exp->dir].tuple.src.u3.ip) { range.flags = NF_NAT_RANGE_MAP_IPS; - range.min_ip = range.max_ip - = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip; + range.min_addr = range.max_addr + = ct->master->tuplehash[!exp->dir].tuple.dst.u3; nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC); } } @@ -307,7 +306,7 @@ static unsigned int ip_nat_sip_expect(struct sk_buff *skb, unsigned int protoff, else port = ntohs(exp->tuple.dst.u.udp.port); - exp->saved_ip = exp->tuple.dst.u3.ip; + exp->saved_addr = exp->tuple.dst.u3; exp->tuple.dst.u3.ip = newip; exp->saved_proto.udp.port = exp->tuple.dst.u.udp.port; exp->dir = !dir; @@ -329,7 +328,7 @@ static unsigned int ip_nat_sip_expect(struct sk_buff *skb, unsigned int protoff, if (port == 0) return NF_DROP; - if (exp->tuple.dst.u3.ip != exp->saved_ip || + if (exp->tuple.dst.u3.ip != exp->saved_addr.ip || exp->tuple.dst.u.udp.port != exp->saved_proto.udp.port) { buflen = sprintf(buffer, "%pI4:%u", &newip, port); if (!mangle_packet(skb, protoff, dataoff, dptr, datalen, @@ -485,13 +484,13 @@ static unsigned int ip_nat_sdp_media(struct sk_buff *skb, unsigned int protoff, else rtp_addr->ip = ct->tuplehash[!dir].tuple.dst.u3.ip; - rtp_exp->saved_ip = rtp_exp->tuple.dst.u3.ip; + rtp_exp->saved_addr = rtp_exp->tuple.dst.u3; rtp_exp->tuple.dst.u3.ip = rtp_addr->ip; rtp_exp->saved_proto.udp.port = rtp_exp->tuple.dst.u.udp.port; rtp_exp->dir = !dir; rtp_exp->expectfn = ip_nat_sip_expected; - rtcp_exp->saved_ip = rtcp_exp->tuple.dst.u3.ip; + rtcp_exp->saved_addr = rtcp_exp->tuple.dst.u3; rtcp_exp->tuple.dst.u3.ip = rtp_addr->ip; rtcp_exp->saved_proto.udp.port = rtcp_exp->tuple.dst.u.udp.port; rtcp_exp->dir = !dir; diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c deleted file mode 100644 index 3828a4229822..000000000000 --- a/net/ipv4/netfilter/nf_nat_standalone.c +++ /dev/null @@ -1,326 +0,0 @@ -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2006 Netfilter Core Team - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef CONFIG_XFRM -static void nat_decode_session(struct sk_buff *skb, struct flowi *fl) -{ - struct flowi4 *fl4 = &fl->u.ip4; - const struct nf_conn *ct; - const struct nf_conntrack_tuple *t; - enum ip_conntrack_info ctinfo; - enum ip_conntrack_dir dir; - unsigned long statusbit; - - ct = nf_ct_get(skb, &ctinfo); - if (ct == NULL) - return; - dir = CTINFO2DIR(ctinfo); - t = &ct->tuplehash[dir].tuple; - - if (dir == IP_CT_DIR_ORIGINAL) - statusbit = IPS_DST_NAT; - else - statusbit = IPS_SRC_NAT; - - if (ct->status & statusbit) { - fl4->daddr = t->dst.u3.ip; - if (t->dst.protonum == IPPROTO_TCP || - t->dst.protonum == IPPROTO_UDP || - t->dst.protonum == IPPROTO_UDPLITE || - t->dst.protonum == IPPROTO_DCCP || - t->dst.protonum == IPPROTO_SCTP) - fl4->fl4_dport = t->dst.u.tcp.port; - } - - statusbit ^= IPS_NAT_MASK; - - if (ct->status & statusbit) { - fl4->saddr = t->src.u3.ip; - if (t->dst.protonum == IPPROTO_TCP || - t->dst.protonum == IPPROTO_UDP || - t->dst.protonum == IPPROTO_UDPLITE || - t->dst.protonum == IPPROTO_DCCP || - t->dst.protonum == IPPROTO_SCTP) - fl4->fl4_sport = t->src.u.tcp.port; - } -} -#endif - -static unsigned int -nf_nat_fn(unsigned int hooknum, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - struct nf_conn *ct; - enum ip_conntrack_info ctinfo; - struct nf_conn_nat *nat; - /* maniptype == SRC for postrouting. */ - enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum); - - /* We never see fragments: conntrack defrags on pre-routing - and local-out, and nf_nat_out protects post-routing. */ - NF_CT_ASSERT(!ip_is_fragment(ip_hdr(skb))); - - ct = nf_ct_get(skb, &ctinfo); - /* Can't track? It's not due to stress, or conntrack would - have dropped it. Hence it's the user's responsibilty to - packet filter it out, or implement conntrack/NAT for that - protocol. 8) --RR */ - if (!ct) - return NF_ACCEPT; - - /* Don't try to NAT if this packet is not conntracked */ - if (nf_ct_is_untracked(ct)) - return NF_ACCEPT; - - nat = nfct_nat(ct); - if (!nat) { - /* NAT module was loaded late. */ - if (nf_ct_is_confirmed(ct)) - return NF_ACCEPT; - nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); - if (nat == NULL) { - pr_debug("failed to add NAT extension\n"); - return NF_ACCEPT; - } - } - - switch (ctinfo) { - case IP_CT_RELATED: - case IP_CT_RELATED_REPLY: - if (ip_hdr(skb)->protocol == IPPROTO_ICMP) { - if (!nf_nat_icmp_reply_translation(ct, ctinfo, - hooknum, skb)) - return NF_DROP; - else - return NF_ACCEPT; - } - /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */ - case IP_CT_NEW: - - /* Seen it before? This can happen for loopback, retrans, - or local packets.. */ - if (!nf_nat_initialized(ct, maniptype)) { - unsigned int ret; - - ret = nf_nat_rule_find(skb, hooknum, in, out, ct); - if (ret != NF_ACCEPT) - return ret; - } else - pr_debug("Already setup manip %s for ct %p\n", - maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST", - ct); - break; - - default: - /* ESTABLISHED */ - NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || - ctinfo == IP_CT_ESTABLISHED_REPLY); - } - - return nf_nat_packet(ct, ctinfo, hooknum, skb); -} - -static unsigned int -nf_nat_in(unsigned int hooknum, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - unsigned int ret; - __be32 daddr = ip_hdr(skb)->daddr; - - ret = nf_nat_fn(hooknum, skb, in, out, okfn); - if (ret != NF_DROP && ret != NF_STOLEN && - daddr != ip_hdr(skb)->daddr) - skb_dst_drop(skb); - - return ret; -} - -static unsigned int -nf_nat_out(unsigned int hooknum, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ -#ifdef CONFIG_XFRM - const struct nf_conn *ct; - enum ip_conntrack_info ctinfo; -#endif - unsigned int ret; - - /* root is playing with raw sockets. */ - if (skb->len < sizeof(struct iphdr) || - ip_hdrlen(skb) < sizeof(struct iphdr)) - return NF_ACCEPT; - - ret = nf_nat_fn(hooknum, skb, in, out, okfn); -#ifdef CONFIG_XFRM - if (ret != NF_DROP && ret != NF_STOLEN && - (ct = nf_ct_get(skb, &ctinfo)) != NULL) { - enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); - - if ((ct->tuplehash[dir].tuple.src.u3.ip != - ct->tuplehash[!dir].tuple.dst.u3.ip) || - (ct->tuplehash[dir].tuple.src.u.all != - ct->tuplehash[!dir].tuple.dst.u.all) - ) - return ip_xfrm_me_harder(skb) == 0 ? ret : NF_DROP; - } -#endif - return ret; -} - -static unsigned int -nf_nat_local_fn(unsigned int hooknum, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - const struct nf_conn *ct; - enum ip_conntrack_info ctinfo; - unsigned int ret; - - /* root is playing with raw sockets. */ - if (skb->len < sizeof(struct iphdr) || - ip_hdrlen(skb) < sizeof(struct iphdr)) - return NF_ACCEPT; - - ret = nf_nat_fn(hooknum, skb, in, out, okfn); - if (ret != NF_DROP && ret != NF_STOLEN && - (ct = nf_ct_get(skb, &ctinfo)) != NULL) { - enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); - - if (ct->tuplehash[dir].tuple.dst.u3.ip != - ct->tuplehash[!dir].tuple.src.u3.ip) { - if (ip_route_me_harder(skb, RTN_UNSPEC)) - ret = NF_DROP; - } -#ifdef CONFIG_XFRM - else if (ct->tuplehash[dir].tuple.dst.u.all != - ct->tuplehash[!dir].tuple.src.u.all) - if (ip_xfrm_me_harder(skb)) - ret = NF_DROP; -#endif - } - return ret; -} - -/* We must be after connection tracking and before packet filtering. */ - -static struct nf_hook_ops nf_nat_ops[] __read_mostly = { - /* Before packet filtering, change destination */ - { - .hook = nf_nat_in, - .owner = THIS_MODULE, - .pf = NFPROTO_IPV4, - .hooknum = NF_INET_PRE_ROUTING, - .priority = NF_IP_PRI_NAT_DST, - }, - /* After packet filtering, change source */ - { - .hook = nf_nat_out, - .owner = THIS_MODULE, - .pf = NFPROTO_IPV4, - .hooknum = NF_INET_POST_ROUTING, - .priority = NF_IP_PRI_NAT_SRC, - }, - /* Before packet filtering, change destination */ - { - .hook = nf_nat_local_fn, - .owner = THIS_MODULE, - .pf = NFPROTO_IPV4, - .hooknum = NF_INET_LOCAL_OUT, - .priority = NF_IP_PRI_NAT_DST, - }, - /* After packet filtering, change source */ - { - .hook = nf_nat_fn, - .owner = THIS_MODULE, - .pf = NFPROTO_IPV4, - .hooknum = NF_INET_LOCAL_IN, - .priority = NF_IP_PRI_NAT_SRC, - }, -}; - -static int __init nf_nat_standalone_init(void) -{ - int ret = 0; - - need_ipv4_conntrack(); - -#ifdef CONFIG_XFRM - BUG_ON(ip_nat_decode_session != NULL); - RCU_INIT_POINTER(ip_nat_decode_session, nat_decode_session); -#endif - ret = nf_nat_rule_init(); - if (ret < 0) { - pr_err("nf_nat_init: can't setup rules.\n"); - goto cleanup_decode_session; - } - ret = nf_register_hooks(nf_nat_ops, ARRAY_SIZE(nf_nat_ops)); - if (ret < 0) { - pr_err("nf_nat_init: can't register hooks.\n"); - goto cleanup_rule_init; - } - return ret; - - cleanup_rule_init: - nf_nat_rule_cleanup(); - cleanup_decode_session: -#ifdef CONFIG_XFRM - RCU_INIT_POINTER(ip_nat_decode_session, NULL); - synchronize_net(); -#endif - return ret; -} - -static void __exit nf_nat_standalone_fini(void) -{ - nf_unregister_hooks(nf_nat_ops, ARRAY_SIZE(nf_nat_ops)); - nf_nat_rule_cleanup(); -#ifdef CONFIG_XFRM - RCU_INIT_POINTER(ip_nat_decode_session, NULL); - synchronize_net(); -#endif - /* Conntrack caches are unregistered in nf_conntrack_cleanup */ -} - -module_init(nf_nat_standalone_init); -module_exit(nf_nat_standalone_fini); - -MODULE_LICENSE("GPL"); -MODULE_ALIAS("ip_nat"); diff --git a/net/ipv4/netfilter/nf_nat_tftp.c b/net/ipv4/netfilter/nf_nat_tftp.c index 9dbb8d284f99..ccabbda71a3e 100644 --- a/net/ipv4/netfilter/nf_nat_tftp.c +++ b/net/ipv4/netfilter/nf_nat_tftp.c @@ -11,7 +11,6 @@ #include #include #include -#include #include MODULE_AUTHOR("Magnus Boden "); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index c19b214ffd57..91adddae20a4 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -356,6 +356,30 @@ config NETFILTER_NETLINK_QUEUE_CT If this option is enabled, NFQUEUE can include Connection Tracking information together with the packet is the enqueued via NFNETLINK. +config NF_NAT + tristate + +config NF_NAT_NEEDED + bool + depends on NF_NAT + default y + +config NF_NAT_PROTO_DCCP + tristate + depends on NF_NAT && NF_CT_PROTO_DCCP + default NF_NAT && NF_CT_PROTO_DCCP + +config NF_NAT_PROTO_UDPLITE + tristate + depends on NF_NAT && NF_CT_PROTO_UDPLITE + default NF_NAT && NF_CT_PROTO_UDPLITE + +config NF_NAT_PROTO_SCTP + tristate + default NF_NAT && NF_CT_PROTO_SCTP + depends on NF_NAT && NF_CT_PROTO_SCTP + select LIBCRC32C + endif # NF_CONNTRACK # transparent proxy support diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 1c5160f2278e..09c9451bc510 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -43,6 +43,17 @@ obj-$(CONFIG_NF_CONNTRACK_SANE) += nf_conntrack_sane.o obj-$(CONFIG_NF_CONNTRACK_SIP) += nf_conntrack_sip.o obj-$(CONFIG_NF_CONNTRACK_TFTP) += nf_conntrack_tftp.o +nf_nat-y := nf_nat_core.o nf_nat_proto_unknown.o nf_nat_proto_common.o \ + nf_nat_proto_udp.o nf_nat_proto_tcp.o nf_nat_helper.o + +obj-$(CONFIG_NF_NAT) += nf_nat.o +obj-$(CONFIG_NF_NAT) += xt_nat.o + +# NAT protocols (nf_nat) +obj-$(CONFIG_NF_NAT_PROTO_DCCP) += nf_nat_proto_dccp.o +obj-$(CONFIG_NF_NAT_PROTO_UDPLITE) += nf_nat_proto_udplite.o +obj-$(CONFIG_NF_NAT_PROTO_SCTP) += nf_nat_proto_sctp.o + # transparent proxy support obj-$(CONFIG_NETFILTER_TPROXY) += nf_tproxy_core.o diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 8f4b0b2b6f80..e61b3ac9591b 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -275,6 +275,11 @@ EXPORT_SYMBOL_GPL(nfq_ct_nat_hook); #endif /* CONFIG_NF_CONNTRACK */ +#ifdef CONFIG_NF_NAT_NEEDED +void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *); +EXPORT_SYMBOL(nf_nat_decode_session_hook); +#endif + #ifdef CONFIG_PROC_FS struct proc_dir_entry *proc_net_netfilter; EXPORT_SYMBOL(proc_net_netfilter); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index cf4875565d67..f83e79defed9 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -55,6 +55,12 @@ int (*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct, const struct nlattr *attr) __read_mostly; EXPORT_SYMBOL_GPL(nfnetlink_parse_nat_setup_hook); +int (*nf_nat_seq_adjust_hook)(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff); +EXPORT_SYMBOL_GPL(nf_nat_seq_adjust_hook); + DEFINE_SPINLOCK(nf_conntrack_lock); EXPORT_SYMBOL_GPL(nf_conntrack_lock); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index da4fc37a8578..966f5133a384 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -45,7 +45,7 @@ #include #ifdef CONFIG_NF_NAT_NEEDED #include -#include +#include #include #endif @@ -1096,13 +1096,14 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct, const struct nlattr *attr) { typeof(nfnetlink_parse_nat_setup_hook) parse_nat_setup; + int err; parse_nat_setup = rcu_dereference(nfnetlink_parse_nat_setup_hook); if (!parse_nat_setup) { #ifdef CONFIG_MODULES rcu_read_unlock(); nfnl_unlock(); - if (request_module("nf-nat-ipv4") < 0) { + if (request_module("nf-nat") < 0) { nfnl_lock(); rcu_read_lock(); return -EOPNOTSUPP; @@ -1115,7 +1116,26 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct, return -EOPNOTSUPP; } - return parse_nat_setup(ct, manip, attr); + err = parse_nat_setup(ct, manip, attr); + if (err == -EAGAIN) { +#ifdef CONFIG_MODULES + rcu_read_unlock(); + spin_unlock_bh(&nf_conntrack_lock); + nfnl_unlock(); + if (request_module("nf-nat-%u", nf_ct_l3num(ct)) < 0) { + nfnl_lock(); + spin_lock_bh(&nf_conntrack_lock); + rcu_read_lock(); + return -EOPNOTSUPP; + } + nfnl_lock(); + spin_lock_bh(&nf_conntrack_lock); + rcu_read_lock(); +#else + err = -EOPNOTSUPP; +#endif + } + return err; } #endif @@ -1979,6 +1999,8 @@ nla_put_failure: return -1; } +static const union nf_inet_addr any_addr; + static int ctnetlink_exp_dump_expect(struct sk_buff *skb, const struct nf_conntrack_expect *exp) @@ -2005,7 +2027,8 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb, goto nla_put_failure; #ifdef CONFIG_NF_NAT_NEEDED - if (exp->saved_ip || exp->saved_proto.all) { + if (!nf_inet_addr_cmp(&exp->saved_addr, &any_addr) || + exp->saved_proto.all) { nest_parms = nla_nest_start(skb, CTA_EXPECT_NAT | NLA_F_NESTED); if (!nest_parms) goto nla_put_failure; @@ -2014,7 +2037,7 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb, goto nla_put_failure; nat_tuple.src.l3num = nf_ct_l3num(master); - nat_tuple.src.u3.ip = exp->saved_ip; + nat_tuple.src.u3 = exp->saved_addr; nat_tuple.dst.protonum = nf_ct_protonum(master); nat_tuple.src.u = exp->saved_proto; @@ -2410,7 +2433,7 @@ ctnetlink_parse_expect_nat(const struct nlattr *attr, if (err < 0) return err; - exp->saved_ip = nat_tuple.src.u3.ip; + exp->saved_addr = nat_tuple.src.u3; exp->saved_proto = nat_tuple.src.u; exp->dir = ntohl(nla_get_be32(tb[CTA_EXPECT_NAT_DIR])); diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index a5ac11ebef33..9c2cc716f4a5 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -505,10 +505,10 @@ static inline s16 nat_offset(const struct nf_conn *ct, return get_offset != NULL ? get_offset(ct, dir, seq) : 0; } -#define NAT_OFFSET(pf, ct, dir, seq) \ - (pf == NFPROTO_IPV4 ? nat_offset(ct, dir, seq) : 0) +#define NAT_OFFSET(ct, dir, seq) \ + (nat_offset(ct, dir, seq)) #else -#define NAT_OFFSET(pf, ct, dir, seq) 0 +#define NAT_OFFSET(ct, dir, seq) 0 #endif static bool tcp_in_window(const struct nf_conn *ct, @@ -541,7 +541,7 @@ static bool tcp_in_window(const struct nf_conn *ct, tcp_sack(skb, dataoff, tcph, &sack); /* Take into account NAT sequence number mangling */ - receiver_offset = NAT_OFFSET(pf, ct, !dir, ack - 1); + receiver_offset = NAT_OFFSET(ct, !dir, ack - 1); ack -= receiver_offset; sack -= receiver_offset; diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 590f0abaab8c..d5174902db37 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -946,11 +946,11 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff, break; #ifdef CONFIG_NF_NAT_NEEDED if (exp->tuple.src.l3num == AF_INET && !direct_rtp && - (exp->saved_ip != exp->tuple.dst.u3.ip || + (exp->saved_addr.ip != exp->tuple.dst.u3.ip || exp->saved_proto.udp.port != exp->tuple.dst.u.udp.port) && ct->status & IPS_NAT_MASK) { - daddr->ip = exp->saved_ip; - tuple.dst.u3.ip = exp->saved_ip; + daddr->ip = exp->saved_addr.ip; + tuple.dst.u3.ip = exp->saved_addr.ip; tuple.dst.u.udp.port = exp->saved_proto.udp.port; direct_rtp = 1; } else diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c new file mode 100644 index 000000000000..c577b753fb9a --- /dev/null +++ b/net/netfilter/nf_nat_core.c @@ -0,0 +1,854 @@ +/* + * (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2006 Netfilter Core Team + * (C) 2011 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static DEFINE_SPINLOCK(nf_nat_lock); + +static DEFINE_MUTEX(nf_nat_proto_mutex); +static const struct nf_nat_l3proto __rcu *nf_nat_l3protos[NFPROTO_NUMPROTO] + __read_mostly; +static const struct nf_nat_l4proto __rcu **nf_nat_l4protos[NFPROTO_NUMPROTO] + __read_mostly; + + +inline const struct nf_nat_l3proto * +__nf_nat_l3proto_find(u8 family) +{ + return rcu_dereference(nf_nat_l3protos[family]); +} + +inline const struct nf_nat_l4proto * +__nf_nat_l4proto_find(u8 family, u8 protonum) +{ + return rcu_dereference(nf_nat_l4protos[family][protonum]); +} +EXPORT_SYMBOL_GPL(__nf_nat_l4proto_find); + +#ifdef CONFIG_XFRM +static void __nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl) +{ + const struct nf_nat_l3proto *l3proto; + const struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + enum ip_conntrack_dir dir; + unsigned long statusbit; + u8 family; + + ct = nf_ct_get(skb, &ctinfo); + if (ct == NULL) + return; + + family = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num; + rcu_read_lock(); + l3proto = __nf_nat_l3proto_find(family); + if (l3proto == NULL) + goto out; + + dir = CTINFO2DIR(ctinfo); + if (dir == IP_CT_DIR_ORIGINAL) + statusbit = IPS_DST_NAT; + else + statusbit = IPS_SRC_NAT; + + l3proto->decode_session(skb, ct, dir, statusbit, fl); +out: + rcu_read_unlock(); +} + +int nf_xfrm_me_harder(struct sk_buff *skb, unsigned int family) +{ + struct flowi fl; + unsigned int hh_len; + struct dst_entry *dst; + + if (xfrm_decode_session(skb, &fl, family) < 0) + return -1; + + dst = skb_dst(skb); + if (dst->xfrm) + dst = ((struct xfrm_dst *)dst)->route; + dst_hold(dst); + + dst = xfrm_lookup(dev_net(dst->dev), dst, &fl, skb->sk, 0); + if (IS_ERR(dst)) + return -1; + + skb_dst_drop(skb); + skb_dst_set(skb, dst); + + /* Change in oif may mean change in hh_len. */ + hh_len = skb_dst(skb)->dev->hard_header_len; + if (skb_headroom(skb) < hh_len && + pskb_expand_head(skb, hh_len - skb_headroom(skb), 0, GFP_ATOMIC)) + return -1; + return 0; +} +EXPORT_SYMBOL(nf_xfrm_me_harder); +#endif /* CONFIG_XFRM */ + +/* We keep an extra hash for each conntrack, for fast searching. */ +static inline unsigned int +hash_by_src(const struct net *net, u16 zone, + const struct nf_conntrack_tuple *tuple) +{ + unsigned int hash; + + /* Original src, to ensure we map it consistently if poss. */ + hash = jhash2((u32 *)&tuple->src, sizeof(tuple->src) / sizeof(u32), + tuple->dst.protonum ^ zone ^ nf_conntrack_hash_rnd); + return ((u64)hash * net->ct.nat_htable_size) >> 32; +} + +/* Is this tuple already taken? (not by us) */ +int +nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple, + const struct nf_conn *ignored_conntrack) +{ + /* Conntrack tracking doesn't keep track of outgoing tuples; only + * incoming ones. NAT means they don't have a fixed mapping, + * so we invert the tuple and look for the incoming reply. + * + * We could keep a separate hash if this proves too slow. + */ + struct nf_conntrack_tuple reply; + + nf_ct_invert_tuplepr(&reply, tuple); + return nf_conntrack_tuple_taken(&reply, ignored_conntrack); +} +EXPORT_SYMBOL(nf_nat_used_tuple); + +/* If we source map this tuple so reply looks like reply_tuple, will + * that meet the constraints of range. + */ +static int in_range(const struct nf_nat_l3proto *l3proto, + const struct nf_nat_l4proto *l4proto, + const struct nf_conntrack_tuple *tuple, + const struct nf_nat_range *range) +{ + /* If we are supposed to map IPs, then we must be in the + * range specified, otherwise let this drag us onto a new src IP. + */ + if (range->flags & NF_NAT_RANGE_MAP_IPS && + !l3proto->in_range(tuple, range)) + return 0; + + if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) || + l4proto->in_range(tuple, NF_NAT_MANIP_SRC, + &range->min_proto, &range->max_proto)) + return 1; + + return 0; +} + +static inline int +same_src(const struct nf_conn *ct, + const struct nf_conntrack_tuple *tuple) +{ + const struct nf_conntrack_tuple *t; + + t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; + return (t->dst.protonum == tuple->dst.protonum && + nf_inet_addr_cmp(&t->src.u3, &tuple->src.u3) && + t->src.u.all == tuple->src.u.all); +} + +/* Only called for SRC manip */ +static int +find_appropriate_src(struct net *net, u16 zone, + const struct nf_nat_l3proto *l3proto, + const struct nf_nat_l4proto *l4proto, + const struct nf_conntrack_tuple *tuple, + struct nf_conntrack_tuple *result, + const struct nf_nat_range *range) +{ + unsigned int h = hash_by_src(net, zone, tuple); + const struct nf_conn_nat *nat; + const struct nf_conn *ct; + const struct hlist_node *n; + + hlist_for_each_entry_rcu(nat, n, &net->ct.nat_bysource[h], bysource) { + ct = nat->ct; + if (same_src(ct, tuple) && nf_ct_zone(ct) == zone) { + /* Copy source part from reply tuple. */ + nf_ct_invert_tuplepr(result, + &ct->tuplehash[IP_CT_DIR_REPLY].tuple); + result->dst = tuple->dst; + + if (in_range(l3proto, l4proto, result, range)) { + rcu_read_unlock(); + return 1; + } + } + } + return 0; +} + +/* For [FUTURE] fragmentation handling, we want the least-used + * src-ip/dst-ip/proto triple. Fairness doesn't come into it. Thus + * if the range specifies 1.2.3.4 ports 10000-10005 and 1.2.3.5 ports + * 1-65535, we don't do pro-rata allocation based on ports; we choose + * the ip with the lowest src-ip/dst-ip/proto usage. + */ +static void +find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple, + const struct nf_nat_range *range, + const struct nf_conn *ct, + enum nf_nat_manip_type maniptype) +{ + union nf_inet_addr *var_ipp; + unsigned int i, max; + /* Host order */ + u32 minip, maxip, j, dist; + bool full_range; + + /* No IP mapping? Do nothing. */ + if (!(range->flags & NF_NAT_RANGE_MAP_IPS)) + return; + + if (maniptype == NF_NAT_MANIP_SRC) + var_ipp = &tuple->src.u3; + else + var_ipp = &tuple->dst.u3; + + /* Fast path: only one choice. */ + if (nf_inet_addr_cmp(&range->min_addr, &range->max_addr)) { + *var_ipp = range->min_addr; + return; + } + + if (nf_ct_l3num(ct) == NFPROTO_IPV4) + max = sizeof(var_ipp->ip) / sizeof(u32) - 1; + else + max = sizeof(var_ipp->ip6) / sizeof(u32) - 1; + + /* Hashing source and destination IPs gives a fairly even + * spread in practice (if there are a small number of IPs + * involved, there usually aren't that many connections + * anyway). The consistency means that servers see the same + * client coming from the same IP (some Internet Banking sites + * like this), even across reboots. + */ + j = jhash2((u32 *)&tuple->src.u3, sizeof(tuple->src.u3), + range->flags & NF_NAT_RANGE_PERSISTENT ? + 0 : (__force u32)tuple->dst.u3.all[max] ^ zone); + + full_range = false; + for (i = 0; i <= max; i++) { + /* If first bytes of the address are at the maximum, use the + * distance. Otherwise use the full range. + */ + if (!full_range) { + minip = ntohl((__force __be32)range->min_addr.all[i]); + maxip = ntohl((__force __be32)range->max_addr.all[i]); + dist = maxip - minip + 1; + } else { + minip = 0; + dist = ~0; + } + + var_ipp->all[i] = (__force __u32) + htonl(minip + (((u64)j * dist) >> 32)); + if (var_ipp->all[i] != range->max_addr.all[i]) + full_range = true; + + if (!(range->flags & NF_NAT_RANGE_PERSISTENT)) + j ^= (__force u32)tuple->dst.u3.all[i]; + } +} + +/* Manipulate the tuple into the range given. For NF_INET_POST_ROUTING, + * we change the source to map into the range. For NF_INET_PRE_ROUTING + * and NF_INET_LOCAL_OUT, we change the destination to map into the + * range. It might not be possible to get a unique tuple, but we try. + * At worst (or if we race), we will end up with a final duplicate in + * __ip_conntrack_confirm and drop the packet. */ +static void +get_unique_tuple(struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_tuple *orig_tuple, + const struct nf_nat_range *range, + struct nf_conn *ct, + enum nf_nat_manip_type maniptype) +{ + const struct nf_nat_l3proto *l3proto; + const struct nf_nat_l4proto *l4proto; + struct net *net = nf_ct_net(ct); + u16 zone = nf_ct_zone(ct); + + rcu_read_lock(); + l3proto = __nf_nat_l3proto_find(orig_tuple->src.l3num); + l4proto = __nf_nat_l4proto_find(orig_tuple->src.l3num, + orig_tuple->dst.protonum); + + /* 1) If this srcip/proto/src-proto-part is currently mapped, + * and that same mapping gives a unique tuple within the given + * range, use that. + * + * This is only required for source (ie. NAT/masq) mappings. + * So far, we don't do local source mappings, so multiple + * manips not an issue. + */ + if (maniptype == NF_NAT_MANIP_SRC && + !(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) { + /* try the original tuple first */ + if (in_range(l3proto, l4proto, orig_tuple, range)) { + if (!nf_nat_used_tuple(orig_tuple, ct)) { + *tuple = *orig_tuple; + goto out; + } + } else if (find_appropriate_src(net, zone, l3proto, l4proto, + orig_tuple, tuple, range)) { + pr_debug("get_unique_tuple: Found current src map\n"); + if (!nf_nat_used_tuple(tuple, ct)) + goto out; + } + } + + /* 2) Select the least-used IP/proto combination in the given range */ + *tuple = *orig_tuple; + find_best_ips_proto(zone, tuple, range, ct, maniptype); + + /* 3) The per-protocol part of the manip is made to map into + * the range to make a unique tuple. + */ + + /* Only bother mapping if it's not already in range and unique */ + if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) { + if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) { + if (l4proto->in_range(tuple, maniptype, + &range->min_proto, + &range->max_proto) && + (range->min_proto.all == range->max_proto.all || + !nf_nat_used_tuple(tuple, ct))) + goto out; + } else if (!nf_nat_used_tuple(tuple, ct)) { + goto out; + } + } + + /* Last change: get protocol to try to obtain unique tuple. */ + l4proto->unique_tuple(l3proto, tuple, range, maniptype, ct); +out: + rcu_read_unlock(); +} + +unsigned int +nf_nat_setup_info(struct nf_conn *ct, + const struct nf_nat_range *range, + enum nf_nat_manip_type maniptype) +{ + struct net *net = nf_ct_net(ct); + struct nf_conntrack_tuple curr_tuple, new_tuple; + struct nf_conn_nat *nat; + + /* nat helper or nfctnetlink also setup binding */ + nat = nfct_nat(ct); + if (!nat) { + nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); + if (nat == NULL) { + pr_debug("failed to add NAT extension\n"); + return NF_ACCEPT; + } + } + + NF_CT_ASSERT(maniptype == NF_NAT_MANIP_SRC || + maniptype == NF_NAT_MANIP_DST); + BUG_ON(nf_nat_initialized(ct, maniptype)); + + /* What we've got will look like inverse of reply. Normally + * this is what is in the conntrack, except for prior + * manipulations (future optimization: if num_manips == 0, + * orig_tp = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple) + */ + nf_ct_invert_tuplepr(&curr_tuple, + &ct->tuplehash[IP_CT_DIR_REPLY].tuple); + + get_unique_tuple(&new_tuple, &curr_tuple, range, ct, maniptype); + + if (!nf_ct_tuple_equal(&new_tuple, &curr_tuple)) { + struct nf_conntrack_tuple reply; + + /* Alter conntrack table so will recognize replies. */ + nf_ct_invert_tuplepr(&reply, &new_tuple); + nf_conntrack_alter_reply(ct, &reply); + + /* Non-atomic: we own this at the moment. */ + if (maniptype == NF_NAT_MANIP_SRC) + ct->status |= IPS_SRC_NAT; + else + ct->status |= IPS_DST_NAT; + } + + if (maniptype == NF_NAT_MANIP_SRC) { + unsigned int srchash; + + srchash = hash_by_src(net, nf_ct_zone(ct), + &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + spin_lock_bh(&nf_nat_lock); + /* nf_conntrack_alter_reply might re-allocate extension aera */ + nat = nfct_nat(ct); + nat->ct = ct; + hlist_add_head_rcu(&nat->bysource, + &net->ct.nat_bysource[srchash]); + spin_unlock_bh(&nf_nat_lock); + } + + /* It's done. */ + if (maniptype == NF_NAT_MANIP_DST) + ct->status |= IPS_DST_NAT_DONE; + else + ct->status |= IPS_SRC_NAT_DONE; + + return NF_ACCEPT; +} +EXPORT_SYMBOL(nf_nat_setup_info); + +/* Do packet manipulations according to nf_nat_setup_info. */ +unsigned int nf_nat_packet(struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int hooknum, + struct sk_buff *skb) +{ + const struct nf_nat_l3proto *l3proto; + const struct nf_nat_l4proto *l4proto; + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + unsigned long statusbit; + enum nf_nat_manip_type mtype = HOOK2MANIP(hooknum); + + if (mtype == NF_NAT_MANIP_SRC) + statusbit = IPS_SRC_NAT; + else + statusbit = IPS_DST_NAT; + + /* Invert if this is reply dir. */ + if (dir == IP_CT_DIR_REPLY) + statusbit ^= IPS_NAT_MASK; + + /* Non-atomic: these bits don't change. */ + if (ct->status & statusbit) { + struct nf_conntrack_tuple target; + + /* We are aiming to look like inverse of other direction. */ + nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); + + l3proto = __nf_nat_l3proto_find(target.src.l3num); + l4proto = __nf_nat_l4proto_find(target.src.l3num, + target.dst.protonum); + if (!l3proto->manip_pkt(skb, 0, l4proto, &target, mtype)) + return NF_DROP; + } + return NF_ACCEPT; +} +EXPORT_SYMBOL_GPL(nf_nat_packet); + +struct nf_nat_proto_clean { + u8 l3proto; + u8 l4proto; + bool hash; +}; + +/* Clear NAT section of all conntracks, in case we're loaded again. */ +static int nf_nat_proto_clean(struct nf_conn *i, void *data) +{ + const struct nf_nat_proto_clean *clean = data; + struct nf_conn_nat *nat = nfct_nat(i); + + if (!nat) + return 0; + if ((clean->l3proto && nf_ct_l3num(i) != clean->l3proto) || + (clean->l4proto && nf_ct_protonum(i) != clean->l4proto)) + return 0; + + if (clean->hash) { + spin_lock_bh(&nf_nat_lock); + hlist_del_rcu(&nat->bysource); + spin_unlock_bh(&nf_nat_lock); + } else { + memset(nat, 0, sizeof(*nat)); + i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | + IPS_SEQ_ADJUST); + } + return 0; +} + +static void nf_nat_l4proto_clean(u8 l3proto, u8 l4proto) +{ + struct nf_nat_proto_clean clean = { + .l3proto = l3proto, + .l4proto = l4proto, + }; + struct net *net; + + rtnl_lock(); + /* Step 1 - remove from bysource hash */ + clean.hash = true; + for_each_net(net) + nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean); + synchronize_rcu(); + + /* Step 2 - clean NAT section */ + clean.hash = false; + for_each_net(net) + nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean); + rtnl_unlock(); +} + +static void nf_nat_l3proto_clean(u8 l3proto) +{ + struct nf_nat_proto_clean clean = { + .l3proto = l3proto, + }; + struct net *net; + + rtnl_lock(); + /* Step 1 - remove from bysource hash */ + clean.hash = true; + for_each_net(net) + nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean); + synchronize_rcu(); + + /* Step 2 - clean NAT section */ + clean.hash = false; + for_each_net(net) + nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean); + rtnl_unlock(); +} + +/* Protocol registration. */ +int nf_nat_l4proto_register(u8 l3proto, const struct nf_nat_l4proto *l4proto) +{ + const struct nf_nat_l4proto **l4protos; + unsigned int i; + int ret = 0; + + mutex_lock(&nf_nat_proto_mutex); + if (nf_nat_l4protos[l3proto] == NULL) { + l4protos = kmalloc(IPPROTO_MAX * sizeof(struct nf_nat_l4proto *), + GFP_KERNEL); + if (l4protos == NULL) { + ret = -ENOMEM; + goto out; + } + + for (i = 0; i < IPPROTO_MAX; i++) + RCU_INIT_POINTER(l4protos[i], &nf_nat_l4proto_unknown); + + /* Before making proto_array visible to lockless readers, + * we must make sure its content is committed to memory. + */ + smp_wmb(); + + nf_nat_l4protos[l3proto] = l4protos; + } + + if (rcu_dereference_protected( + nf_nat_l4protos[l3proto][l4proto->l4proto], + lockdep_is_held(&nf_nat_proto_mutex) + ) != &nf_nat_l4proto_unknown) { + ret = -EBUSY; + goto out; + } + RCU_INIT_POINTER(nf_nat_l4protos[l3proto][l4proto->l4proto], l4proto); + out: + mutex_unlock(&nf_nat_proto_mutex); + return ret; +} +EXPORT_SYMBOL_GPL(nf_nat_l4proto_register); + +/* No one stores the protocol anywhere; simply delete it. */ +void nf_nat_l4proto_unregister(u8 l3proto, const struct nf_nat_l4proto *l4proto) +{ + mutex_lock(&nf_nat_proto_mutex); + RCU_INIT_POINTER(nf_nat_l4protos[l3proto][l4proto->l4proto], + &nf_nat_l4proto_unknown); + mutex_unlock(&nf_nat_proto_mutex); + synchronize_rcu(); + + nf_nat_l4proto_clean(l3proto, l4proto->l4proto); +} +EXPORT_SYMBOL_GPL(nf_nat_l4proto_unregister); + +int nf_nat_l3proto_register(const struct nf_nat_l3proto *l3proto) +{ + int err; + + err = nf_ct_l3proto_try_module_get(l3proto->l3proto); + if (err < 0) + return err; + + mutex_lock(&nf_nat_proto_mutex); + RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_TCP], + &nf_nat_l4proto_tcp); + RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_UDP], + &nf_nat_l4proto_udp); + mutex_unlock(&nf_nat_proto_mutex); + + RCU_INIT_POINTER(nf_nat_l3protos[l3proto->l3proto], l3proto); + return 0; +} +EXPORT_SYMBOL_GPL(nf_nat_l3proto_register); + +void nf_nat_l3proto_unregister(const struct nf_nat_l3proto *l3proto) +{ + mutex_lock(&nf_nat_proto_mutex); + RCU_INIT_POINTER(nf_nat_l3protos[l3proto->l3proto], NULL); + mutex_unlock(&nf_nat_proto_mutex); + synchronize_rcu(); + + nf_nat_l3proto_clean(l3proto->l3proto); + nf_ct_l3proto_module_put(l3proto->l3proto); +} +EXPORT_SYMBOL_GPL(nf_nat_l3proto_unregister); + +/* No one using conntrack by the time this called. */ +static void nf_nat_cleanup_conntrack(struct nf_conn *ct) +{ + struct nf_conn_nat *nat = nf_ct_ext_find(ct, NF_CT_EXT_NAT); + + if (nat == NULL || nat->ct == NULL) + return; + + NF_CT_ASSERT(nat->ct->status & IPS_SRC_NAT_DONE); + + spin_lock_bh(&nf_nat_lock); + hlist_del_rcu(&nat->bysource); + spin_unlock_bh(&nf_nat_lock); +} + +static void nf_nat_move_storage(void *new, void *old) +{ + struct nf_conn_nat *new_nat = new; + struct nf_conn_nat *old_nat = old; + struct nf_conn *ct = old_nat->ct; + + if (!ct || !(ct->status & IPS_SRC_NAT_DONE)) + return; + + spin_lock_bh(&nf_nat_lock); + hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource); + spin_unlock_bh(&nf_nat_lock); +} + +static struct nf_ct_ext_type nat_extend __read_mostly = { + .len = sizeof(struct nf_conn_nat), + .align = __alignof__(struct nf_conn_nat), + .destroy = nf_nat_cleanup_conntrack, + .move = nf_nat_move_storage, + .id = NF_CT_EXT_NAT, + .flags = NF_CT_EXT_F_PREALLOC, +}; + +#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) + +#include +#include + +static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = { + [CTA_PROTONAT_PORT_MIN] = { .type = NLA_U16 }, + [CTA_PROTONAT_PORT_MAX] = { .type = NLA_U16 }, +}; + +static int nfnetlink_parse_nat_proto(struct nlattr *attr, + const struct nf_conn *ct, + struct nf_nat_range *range) +{ + struct nlattr *tb[CTA_PROTONAT_MAX+1]; + const struct nf_nat_l4proto *l4proto; + int err; + + err = nla_parse_nested(tb, CTA_PROTONAT_MAX, attr, protonat_nla_policy); + if (err < 0) + return err; + + l4proto = __nf_nat_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); + if (l4proto->nlattr_to_range) + err = l4proto->nlattr_to_range(tb, range); + + return err; +} + +static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = { + [CTA_NAT_V4_MINIP] = { .type = NLA_U32 }, + [CTA_NAT_V4_MAXIP] = { .type = NLA_U32 }, + [CTA_NAT_PROTO] = { .type = NLA_NESTED }, +}; + +static int +nfnetlink_parse_nat(const struct nlattr *nat, + const struct nf_conn *ct, struct nf_nat_range *range) +{ + const struct nf_nat_l3proto *l3proto; + struct nlattr *tb[CTA_NAT_MAX+1]; + int err; + + memset(range, 0, sizeof(*range)); + + err = nla_parse_nested(tb, CTA_NAT_MAX, nat, nat_nla_policy); + if (err < 0) + return err; + + rcu_read_lock(); + l3proto = __nf_nat_l3proto_find(nf_ct_l3num(ct)); + if (l3proto == NULL) { + err = -EAGAIN; + goto out; + } + err = l3proto->nlattr_to_range(tb, range); + if (err < 0) + goto out; + + if (!tb[CTA_NAT_PROTO]) + goto out; + + err = nfnetlink_parse_nat_proto(tb[CTA_NAT_PROTO], ct, range); +out: + rcu_read_unlock(); + return err; +} + +static int +nfnetlink_parse_nat_setup(struct nf_conn *ct, + enum nf_nat_manip_type manip, + const struct nlattr *attr) +{ + struct nf_nat_range range; + int err; + + err = nfnetlink_parse_nat(attr, ct, &range); + if (err < 0) + return err; + if (nf_nat_initialized(ct, manip)) + return -EEXIST; + + return nf_nat_setup_info(ct, &range, manip); +} +#else +static int +nfnetlink_parse_nat_setup(struct nf_conn *ct, + enum nf_nat_manip_type manip, + const struct nlattr *attr) +{ + return -EOPNOTSUPP; +} +#endif + +static int __net_init nf_nat_net_init(struct net *net) +{ + /* Leave them the same for the moment. */ + net->ct.nat_htable_size = net->ct.htable_size; + net->ct.nat_bysource = nf_ct_alloc_hashtable(&net->ct.nat_htable_size, 0); + if (!net->ct.nat_bysource) + return -ENOMEM; + return 0; +} + +static void __net_exit nf_nat_net_exit(struct net *net) +{ + struct nf_nat_proto_clean clean = {}; + + nf_ct_iterate_cleanup(net, &nf_nat_proto_clean, &clean); + synchronize_rcu(); + nf_ct_free_hashtable(net->ct.nat_bysource, net->ct.nat_htable_size); +} + +static struct pernet_operations nf_nat_net_ops = { + .init = nf_nat_net_init, + .exit = nf_nat_net_exit, +}; + +static struct nf_ct_helper_expectfn follow_master_nat = { + .name = "nat-follow-master", + .expectfn = nf_nat_follow_master, +}; + +static struct nfq_ct_nat_hook nfq_ct_nat = { + .seq_adjust = nf_nat_tcp_seq_adjust, +}; + +static int __init nf_nat_init(void) +{ + int ret; + + ret = nf_ct_extend_register(&nat_extend); + if (ret < 0) { + printk(KERN_ERR "nf_nat_core: Unable to register extension\n"); + return ret; + } + + ret = register_pernet_subsys(&nf_nat_net_ops); + if (ret < 0) + goto cleanup_extend; + + nf_ct_helper_expectfn_register(&follow_master_nat); + + /* Initialize fake conntrack so that NAT will skip it */ + nf_ct_untracked_status_or(IPS_NAT_DONE_MASK); + + BUG_ON(nf_nat_seq_adjust_hook != NULL); + RCU_INIT_POINTER(nf_nat_seq_adjust_hook, nf_nat_seq_adjust); + BUG_ON(nfnetlink_parse_nat_setup_hook != NULL); + RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook, + nfnetlink_parse_nat_setup); + BUG_ON(nf_ct_nat_offset != NULL); + RCU_INIT_POINTER(nf_ct_nat_offset, nf_nat_get_offset); + RCU_INIT_POINTER(nfq_ct_nat_hook, &nfq_ct_nat); +#ifdef CONFIG_XFRM + BUG_ON(nf_nat_decode_session_hook != NULL); + RCU_INIT_POINTER(nf_nat_decode_session_hook, __nf_nat_decode_session); +#endif + return 0; + + cleanup_extend: + nf_ct_extend_unregister(&nat_extend); + return ret; +} + +static void __exit nf_nat_cleanup(void) +{ + unsigned int i; + + unregister_pernet_subsys(&nf_nat_net_ops); + nf_ct_extend_unregister(&nat_extend); + nf_ct_helper_expectfn_unregister(&follow_master_nat); + RCU_INIT_POINTER(nf_nat_seq_adjust_hook, NULL); + RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook, NULL); + RCU_INIT_POINTER(nf_ct_nat_offset, NULL); + RCU_INIT_POINTER(nfq_ct_nat_hook, NULL); +#ifdef CONFIG_XFRM + RCU_INIT_POINTER(nf_nat_decode_session_hook, NULL); +#endif + for (i = 0; i < NFPROTO_NUMPROTO; i++) + kfree(nf_nat_l4protos[i]); + synchronize_net(); +} + +MODULE_LICENSE("GPL"); + +module_init(nf_nat_init); +module_exit(nf_nat_cleanup); diff --git a/net/netfilter/nf_nat_helper.c b/net/netfilter/nf_nat_helper.c new file mode 100644 index 000000000000..23c2b38676a6 --- /dev/null +++ b/net/netfilter/nf_nat_helper.c @@ -0,0 +1,435 @@ +/* nf_nat_helper.c - generic support functions for NAT helpers + * + * (C) 2000-2002 Harald Welte + * (C) 2003-2006 Netfilter Core Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DUMP_OFFSET(x) \ + pr_debug("offset_before=%d, offset_after=%d, correction_pos=%u\n", \ + x->offset_before, x->offset_after, x->correction_pos); + +static DEFINE_SPINLOCK(nf_nat_seqofs_lock); + +/* Setup TCP sequence correction given this change at this sequence */ +static inline void +adjust_tcp_sequence(u32 seq, + int sizediff, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo) +{ + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + struct nf_conn_nat *nat = nfct_nat(ct); + struct nf_nat_seq *this_way = &nat->seq[dir]; + + pr_debug("adjust_tcp_sequence: seq = %u, sizediff = %d\n", + seq, sizediff); + + pr_debug("adjust_tcp_sequence: Seq_offset before: "); + DUMP_OFFSET(this_way); + + spin_lock_bh(&nf_nat_seqofs_lock); + + /* SYN adjust. If it's uninitialized, or this is after last + * correction, record it: we don't handle more than one + * adjustment in the window, but do deal with common case of a + * retransmit */ + if (this_way->offset_before == this_way->offset_after || + before(this_way->correction_pos, seq)) { + this_way->correction_pos = seq; + this_way->offset_before = this_way->offset_after; + this_way->offset_after += sizediff; + } + spin_unlock_bh(&nf_nat_seqofs_lock); + + pr_debug("adjust_tcp_sequence: Seq_offset after: "); + DUMP_OFFSET(this_way); +} + +/* Get the offset value, for conntrack */ +s16 nf_nat_get_offset(const struct nf_conn *ct, + enum ip_conntrack_dir dir, + u32 seq) +{ + struct nf_conn_nat *nat = nfct_nat(ct); + struct nf_nat_seq *this_way; + s16 offset; + + if (!nat) + return 0; + + this_way = &nat->seq[dir]; + spin_lock_bh(&nf_nat_seqofs_lock); + offset = after(seq, this_way->correction_pos) + ? this_way->offset_after : this_way->offset_before; + spin_unlock_bh(&nf_nat_seqofs_lock); + + return offset; +} + +/* Frobs data inside this packet, which is linear. */ +static void mangle_contents(struct sk_buff *skb, + unsigned int dataoff, + unsigned int match_offset, + unsigned int match_len, + const char *rep_buffer, + unsigned int rep_len) +{ + unsigned char *data; + + BUG_ON(skb_is_nonlinear(skb)); + data = skb_network_header(skb) + dataoff; + + /* move post-replacement */ + memmove(data + match_offset + rep_len, + data + match_offset + match_len, + skb->tail - (skb->network_header + dataoff + + match_offset + match_len)); + + /* insert data from buffer */ + memcpy(data + match_offset, rep_buffer, rep_len); + + /* update skb info */ + if (rep_len > match_len) { + pr_debug("nf_nat_mangle_packet: Extending packet by " + "%u from %u bytes\n", rep_len - match_len, skb->len); + skb_put(skb, rep_len - match_len); + } else { + pr_debug("nf_nat_mangle_packet: Shrinking packet from " + "%u from %u bytes\n", match_len - rep_len, skb->len); + __skb_trim(skb, skb->len + rep_len - match_len); + } + + if (nf_ct_l3num((struct nf_conn *)skb->nfct) == NFPROTO_IPV4) { + /* fix IP hdr checksum information */ + ip_hdr(skb)->tot_len = htons(skb->len); + ip_send_check(ip_hdr(skb)); + } else + ipv6_hdr(skb)->payload_len = + htons(skb->len - sizeof(struct ipv6hdr)); +} + +/* Unusual, but possible case. */ +static int enlarge_skb(struct sk_buff *skb, unsigned int extra) +{ + if (skb->len + extra > 65535) + return 0; + + if (pskb_expand_head(skb, 0, extra - skb_tailroom(skb), GFP_ATOMIC)) + return 0; + + return 1; +} + +void nf_nat_set_seq_adjust(struct nf_conn *ct, enum ip_conntrack_info ctinfo, + __be32 seq, s16 off) +{ + if (!off) + return; + set_bit(IPS_SEQ_ADJUST_BIT, &ct->status); + adjust_tcp_sequence(ntohl(seq), off, ct, ctinfo); + nf_conntrack_event_cache(IPCT_NATSEQADJ, ct); +} +EXPORT_SYMBOL_GPL(nf_nat_set_seq_adjust); + +void nf_nat_tcp_seq_adjust(struct sk_buff *skb, struct nf_conn *ct, + u32 ctinfo, int off) +{ + const struct tcphdr *th; + + if (nf_ct_protonum(ct) != IPPROTO_TCP) + return; + + th = (struct tcphdr *)(skb_network_header(skb)+ ip_hdrlen(skb)); + nf_nat_set_seq_adjust(ct, ctinfo, th->seq, off); +} +EXPORT_SYMBOL_GPL(nf_nat_tcp_seq_adjust); + +/* Generic function for mangling variable-length address changes inside + * NATed TCP connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX + * command in FTP). + * + * Takes care about all the nasty sequence number changes, checksumming, + * skb enlargement, ... + * + * */ +int __nf_nat_mangle_tcp_packet(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff, + unsigned int match_offset, + unsigned int match_len, + const char *rep_buffer, + unsigned int rep_len, bool adjust) +{ + const struct nf_nat_l3proto *l3proto; + struct tcphdr *tcph; + int oldlen, datalen; + + if (!skb_make_writable(skb, skb->len)) + return 0; + + if (rep_len > match_len && + rep_len - match_len > skb_tailroom(skb) && + !enlarge_skb(skb, rep_len - match_len)) + return 0; + + SKB_LINEAR_ASSERT(skb); + + tcph = (void *)skb->data + protoff; + + oldlen = skb->len - protoff; + mangle_contents(skb, protoff + tcph->doff*4, + match_offset, match_len, rep_buffer, rep_len); + + datalen = skb->len - protoff; + + l3proto = __nf_nat_l3proto_find(nf_ct_l3num(ct)); + l3proto->csum_recalc(skb, IPPROTO_TCP, tcph, &tcph->check, + datalen, oldlen); + + if (adjust && rep_len != match_len) + nf_nat_set_seq_adjust(ct, ctinfo, tcph->seq, + (int)rep_len - (int)match_len); + + return 1; +} +EXPORT_SYMBOL(__nf_nat_mangle_tcp_packet); + +/* Generic function for mangling variable-length address changes inside + * NATed UDP connections (like the CONNECT DATA XXXXX MESG XXXXX INDEX XXXXX + * command in the Amanda protocol) + * + * Takes care about all the nasty sequence number changes, checksumming, + * skb enlargement, ... + * + * XXX - This function could be merged with nf_nat_mangle_tcp_packet which + * should be fairly easy to do. + */ +int +nf_nat_mangle_udp_packet(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff, + unsigned int match_offset, + unsigned int match_len, + const char *rep_buffer, + unsigned int rep_len) +{ + const struct nf_nat_l3proto *l3proto; + struct udphdr *udph; + int datalen, oldlen; + + if (!skb_make_writable(skb, skb->len)) + return 0; + + if (rep_len > match_len && + rep_len - match_len > skb_tailroom(skb) && + !enlarge_skb(skb, rep_len - match_len)) + return 0; + + udph = (void *)skb->data + protoff; + + oldlen = skb->len - protoff; + mangle_contents(skb, protoff + sizeof(*udph), + match_offset, match_len, rep_buffer, rep_len); + + /* update the length of the UDP packet */ + datalen = skb->len - protoff; + udph->len = htons(datalen); + + /* fix udp checksum if udp checksum was previously calculated */ + if (!udph->check && skb->ip_summed != CHECKSUM_PARTIAL) + return 1; + + l3proto = __nf_nat_l3proto_find(nf_ct_l3num(ct)); + l3proto->csum_recalc(skb, IPPROTO_UDP, udph, &udph->check, + datalen, oldlen); + + return 1; +} +EXPORT_SYMBOL(nf_nat_mangle_udp_packet); + +/* Adjust one found SACK option including checksum correction */ +static void +sack_adjust(struct sk_buff *skb, + struct tcphdr *tcph, + unsigned int sackoff, + unsigned int sackend, + struct nf_nat_seq *natseq) +{ + while (sackoff < sackend) { + struct tcp_sack_block_wire *sack; + __be32 new_start_seq, new_end_seq; + + sack = (void *)skb->data + sackoff; + if (after(ntohl(sack->start_seq) - natseq->offset_before, + natseq->correction_pos)) + new_start_seq = htonl(ntohl(sack->start_seq) + - natseq->offset_after); + else + new_start_seq = htonl(ntohl(sack->start_seq) + - natseq->offset_before); + + if (after(ntohl(sack->end_seq) - natseq->offset_before, + natseq->correction_pos)) + new_end_seq = htonl(ntohl(sack->end_seq) + - natseq->offset_after); + else + new_end_seq = htonl(ntohl(sack->end_seq) + - natseq->offset_before); + + pr_debug("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n", + ntohl(sack->start_seq), new_start_seq, + ntohl(sack->end_seq), new_end_seq); + + inet_proto_csum_replace4(&tcph->check, skb, + sack->start_seq, new_start_seq, 0); + inet_proto_csum_replace4(&tcph->check, skb, + sack->end_seq, new_end_seq, 0); + sack->start_seq = new_start_seq; + sack->end_seq = new_end_seq; + sackoff += sizeof(*sack); + } +} + +/* TCP SACK sequence number adjustment */ +static inline unsigned int +nf_nat_sack_adjust(struct sk_buff *skb, + unsigned int protoff, + struct tcphdr *tcph, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo) +{ + unsigned int dir, optoff, optend; + struct nf_conn_nat *nat = nfct_nat(ct); + + optoff = protoff + sizeof(struct tcphdr); + optend = protoff + tcph->doff * 4; + + if (!skb_make_writable(skb, optend)) + return 0; + + dir = CTINFO2DIR(ctinfo); + + while (optoff < optend) { + /* Usually: option, length. */ + unsigned char *op = skb->data + optoff; + + switch (op[0]) { + case TCPOPT_EOL: + return 1; + case TCPOPT_NOP: + optoff++; + continue; + default: + /* no partial options */ + if (optoff + 1 == optend || + optoff + op[1] > optend || + op[1] < 2) + return 0; + if (op[0] == TCPOPT_SACK && + op[1] >= 2+TCPOLEN_SACK_PERBLOCK && + ((op[1] - 2) % TCPOLEN_SACK_PERBLOCK) == 0) + sack_adjust(skb, tcph, optoff+2, + optoff+op[1], &nat->seq[!dir]); + optoff += op[1]; + } + } + return 1; +} + +/* TCP sequence number adjustment. Returns 1 on success, 0 on failure */ +int +nf_nat_seq_adjust(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff) +{ + struct tcphdr *tcph; + int dir; + __be32 newseq, newack; + s16 seqoff, ackoff; + struct nf_conn_nat *nat = nfct_nat(ct); + struct nf_nat_seq *this_way, *other_way; + + dir = CTINFO2DIR(ctinfo); + + this_way = &nat->seq[dir]; + other_way = &nat->seq[!dir]; + + if (!skb_make_writable(skb, protoff + sizeof(*tcph))) + return 0; + + tcph = (void *)skb->data + protoff; + if (after(ntohl(tcph->seq), this_way->correction_pos)) + seqoff = this_way->offset_after; + else + seqoff = this_way->offset_before; + + if (after(ntohl(tcph->ack_seq) - other_way->offset_before, + other_way->correction_pos)) + ackoff = other_way->offset_after; + else + ackoff = other_way->offset_before; + + newseq = htonl(ntohl(tcph->seq) + seqoff); + newack = htonl(ntohl(tcph->ack_seq) - ackoff); + + inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, 0); + inet_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, 0); + + pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n", + ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq), + ntohl(newack)); + + tcph->seq = newseq; + tcph->ack_seq = newack; + + return nf_nat_sack_adjust(skb, protoff, tcph, ct, ctinfo); +} + +/* Setup NAT on this expected conntrack so it follows master. */ +/* If we fail to get a free NAT slot, we'll get dropped on confirm */ +void nf_nat_follow_master(struct nf_conn *ct, + struct nf_conntrack_expect *exp) +{ + struct nf_nat_range range; + + /* This must be a fresh one. */ + BUG_ON(ct->status & IPS_NAT_DONE_MASK); + + /* Change src to where master sends to */ + range.flags = NF_NAT_RANGE_MAP_IPS; + range.min_addr = range.max_addr + = ct->master->tuplehash[!exp->dir].tuple.dst.u3; + nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC); + + /* For DST manip, map port here to where it's expected. */ + range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED); + range.min_proto = range.max_proto = exp->saved_proto; + range.min_addr = range.max_addr + = ct->master->tuplehash[!exp->dir].tuple.src.u3; + nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST); +} +EXPORT_SYMBOL(nf_nat_follow_master); diff --git a/net/netfilter/nf_nat_proto_common.c b/net/netfilter/nf_nat_proto_common.c new file mode 100644 index 000000000000..9baaf734c142 --- /dev/null +++ b/net/netfilter/nf_nat_proto_common.c @@ -0,0 +1,112 @@ +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2006 Netfilter Core Team + * (C) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include + +bool nf_nat_l4proto_in_range(const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype, + const union nf_conntrack_man_proto *min, + const union nf_conntrack_man_proto *max) +{ + __be16 port; + + if (maniptype == NF_NAT_MANIP_SRC) + port = tuple->src.u.all; + else + port = tuple->dst.u.all; + + return ntohs(port) >= ntohs(min->all) && + ntohs(port) <= ntohs(max->all); +} +EXPORT_SYMBOL_GPL(nf_nat_l4proto_in_range); + +void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto, + struct nf_conntrack_tuple *tuple, + const struct nf_nat_range *range, + enum nf_nat_manip_type maniptype, + const struct nf_conn *ct, + u16 *rover) +{ + unsigned int range_size, min, i; + __be16 *portptr; + u_int16_t off; + + if (maniptype == NF_NAT_MANIP_SRC) + portptr = &tuple->src.u.all; + else + portptr = &tuple->dst.u.all; + + /* If no range specified... */ + if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) { + /* If it's dst rewrite, can't change port */ + if (maniptype == NF_NAT_MANIP_DST) + return; + + if (ntohs(*portptr) < 1024) { + /* Loose convention: >> 512 is credential passing */ + if (ntohs(*portptr) < 512) { + min = 1; + range_size = 511 - min + 1; + } else { + min = 600; + range_size = 1023 - min + 1; + } + } else { + min = 1024; + range_size = 65535 - 1024 + 1; + } + } else { + min = ntohs(range->min_proto.all); + range_size = ntohs(range->max_proto.all) - min + 1; + } + + if (range->flags & NF_NAT_RANGE_PROTO_RANDOM) + off = l3proto->secure_port(tuple, maniptype == NF_NAT_MANIP_SRC + ? tuple->dst.u.all + : tuple->src.u.all); + else + off = *rover; + + for (i = 0; ; ++off) { + *portptr = htons(min + off % range_size); + if (++i != range_size && nf_nat_used_tuple(tuple, ct)) + continue; + if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) + *rover = off; + return; + } + return; +} +EXPORT_SYMBOL_GPL(nf_nat_l4proto_unique_tuple); + +#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +int nf_nat_l4proto_nlattr_to_range(struct nlattr *tb[], + struct nf_nat_range *range) +{ + if (tb[CTA_PROTONAT_PORT_MIN]) { + range->min_proto.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MIN]); + range->max_proto.all = range->min_proto.all; + range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED; + } + if (tb[CTA_PROTONAT_PORT_MAX]) { + range->max_proto.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MAX]); + range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED; + } + return 0; +} +EXPORT_SYMBOL_GPL(nf_nat_l4proto_nlattr_to_range); +#endif diff --git a/net/netfilter/nf_nat_proto_dccp.c b/net/netfilter/nf_nat_proto_dccp.c new file mode 100644 index 000000000000..c8be2cdac0bf --- /dev/null +++ b/net/netfilter/nf_nat_proto_dccp.c @@ -0,0 +1,116 @@ +/* + * DCCP NAT protocol helper + * + * Copyright (c) 2005, 2006, 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +static u_int16_t dccp_port_rover; + +static void +dccp_unique_tuple(const struct nf_nat_l3proto *l3proto, + struct nf_conntrack_tuple *tuple, + const struct nf_nat_range *range, + enum nf_nat_manip_type maniptype, + const struct nf_conn *ct) +{ + nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct, + &dccp_port_rover); +} + +static bool +dccp_manip_pkt(struct sk_buff *skb, + const struct nf_nat_l3proto *l3proto, + unsigned int iphdroff, unsigned int hdroff, + const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype) +{ + struct dccp_hdr *hdr; + __be16 *portptr, oldport, newport; + int hdrsize = 8; /* DCCP connection tracking guarantees this much */ + + if (skb->len >= hdroff + sizeof(struct dccp_hdr)) + hdrsize = sizeof(struct dccp_hdr); + + if (!skb_make_writable(skb, hdroff + hdrsize)) + return false; + + hdr = (struct dccp_hdr *)(skb->data + hdroff); + + if (maniptype == NF_NAT_MANIP_SRC) { + newport = tuple->src.u.dccp.port; + portptr = &hdr->dccph_sport; + } else { + newport = tuple->dst.u.dccp.port; + portptr = &hdr->dccph_dport; + } + + oldport = *portptr; + *portptr = newport; + + if (hdrsize < sizeof(*hdr)) + return true; + + l3proto->csum_update(skb, iphdroff, &hdr->dccph_checksum, + tuple, maniptype); + inet_proto_csum_replace2(&hdr->dccph_checksum, skb, oldport, newport, + 0); + return true; +} + +static const struct nf_nat_l4proto nf_nat_l4proto_dccp = { + .l4proto = IPPROTO_DCCP, + .manip_pkt = dccp_manip_pkt, + .in_range = nf_nat_l4proto_in_range, + .unique_tuple = dccp_unique_tuple, +#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) + .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, +#endif +}; + +static int __init nf_nat_proto_dccp_init(void) +{ + int err; + + err = nf_nat_l4proto_register(NFPROTO_IPV4, &nf_nat_l4proto_dccp); + if (err < 0) + goto err1; + err = nf_nat_l4proto_register(NFPROTO_IPV6, &nf_nat_l4proto_dccp); + if (err < 0) + goto err2; + return 0; + +err2: + nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_dccp); +err1: + return err; +} + +static void __exit nf_nat_proto_dccp_fini(void) +{ + nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_dccp); + nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_dccp); + +} + +module_init(nf_nat_proto_dccp_init); +module_exit(nf_nat_proto_dccp_fini); + +MODULE_AUTHOR("Patrick McHardy "); +MODULE_DESCRIPTION("DCCP NAT protocol helper"); +MODULE_LICENSE("GPL"); diff --git a/net/netfilter/nf_nat_proto_sctp.c b/net/netfilter/nf_nat_proto_sctp.c new file mode 100644 index 000000000000..e64faa5ca893 --- /dev/null +++ b/net/netfilter/nf_nat_proto_sctp.c @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include + +#include + +static u_int16_t nf_sctp_port_rover; + +static void +sctp_unique_tuple(const struct nf_nat_l3proto *l3proto, + struct nf_conntrack_tuple *tuple, + const struct nf_nat_range *range, + enum nf_nat_manip_type maniptype, + const struct nf_conn *ct) +{ + nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct, + &nf_sctp_port_rover); +} + +static bool +sctp_manip_pkt(struct sk_buff *skb, + const struct nf_nat_l3proto *l3proto, + unsigned int iphdroff, unsigned int hdroff, + const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype) +{ + struct sk_buff *frag; + sctp_sctphdr_t *hdr; + __be32 crc32; + + if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) + return false; + + hdr = (struct sctphdr *)(skb->data + hdroff); + + if (maniptype == NF_NAT_MANIP_SRC) { + /* Get rid of src port */ + hdr->source = tuple->src.u.sctp.port; + } else { + /* Get rid of dst port */ + hdr->dest = tuple->dst.u.sctp.port; + } + + crc32 = sctp_start_cksum((u8 *)hdr, skb_headlen(skb) - hdroff); + skb_walk_frags(skb, frag) + crc32 = sctp_update_cksum((u8 *)frag->data, skb_headlen(frag), + crc32); + crc32 = sctp_end_cksum(crc32); + hdr->checksum = crc32; + + return true; +} + +static const struct nf_nat_l4proto nf_nat_l4proto_sctp = { + .l4proto = IPPROTO_SCTP, + .manip_pkt = sctp_manip_pkt, + .in_range = nf_nat_l4proto_in_range, + .unique_tuple = sctp_unique_tuple, +#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) + .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, +#endif +}; + +static int __init nf_nat_proto_sctp_init(void) +{ + int err; + + err = nf_nat_l4proto_register(NFPROTO_IPV4, &nf_nat_l4proto_sctp); + if (err < 0) + goto err1; + err = nf_nat_l4proto_register(NFPROTO_IPV6, &nf_nat_l4proto_sctp); + if (err < 0) + goto err2; + return 0; + +err2: + nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_sctp); +err1: + return err; +} + +static void __exit nf_nat_proto_sctp_exit(void) +{ + nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_sctp); + nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_sctp); +} + +module_init(nf_nat_proto_sctp_init); +module_exit(nf_nat_proto_sctp_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("SCTP NAT protocol helper"); +MODULE_AUTHOR("Patrick McHardy "); diff --git a/net/netfilter/nf_nat_proto_tcp.c b/net/netfilter/nf_nat_proto_tcp.c new file mode 100644 index 000000000000..83ec8a6e4c36 --- /dev/null +++ b/net/netfilter/nf_nat_proto_tcp.c @@ -0,0 +1,85 @@ +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2006 Netfilter Core Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +static u16 tcp_port_rover; + +static void +tcp_unique_tuple(const struct nf_nat_l3proto *l3proto, + struct nf_conntrack_tuple *tuple, + const struct nf_nat_range *range, + enum nf_nat_manip_type maniptype, + const struct nf_conn *ct) +{ + nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct, + &tcp_port_rover); +} + +static bool +tcp_manip_pkt(struct sk_buff *skb, + const struct nf_nat_l3proto *l3proto, + unsigned int iphdroff, unsigned int hdroff, + const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype) +{ + struct tcphdr *hdr; + __be16 *portptr, newport, oldport; + int hdrsize = 8; /* TCP connection tracking guarantees this much */ + + /* this could be a inner header returned in icmp packet; in such + cases we cannot update the checksum field since it is outside of + the 8 bytes of transport layer headers we are guaranteed */ + if (skb->len >= hdroff + sizeof(struct tcphdr)) + hdrsize = sizeof(struct tcphdr); + + if (!skb_make_writable(skb, hdroff + hdrsize)) + return false; + + hdr = (struct tcphdr *)(skb->data + hdroff); + + if (maniptype == NF_NAT_MANIP_SRC) { + /* Get rid of src port */ + newport = tuple->src.u.tcp.port; + portptr = &hdr->source; + } else { + /* Get rid of dst port */ + newport = tuple->dst.u.tcp.port; + portptr = &hdr->dest; + } + + oldport = *portptr; + *portptr = newport; + + if (hdrsize < sizeof(*hdr)) + return true; + + l3proto->csum_update(skb, iphdroff, &hdr->check, tuple, maniptype); + inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, 0); + return true; +} + +const struct nf_nat_l4proto nf_nat_l4proto_tcp = { + .l4proto = IPPROTO_TCP, + .manip_pkt = tcp_manip_pkt, + .in_range = nf_nat_l4proto_in_range, + .unique_tuple = tcp_unique_tuple, +#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) + .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, +#endif +}; diff --git a/net/netfilter/nf_nat_proto_udp.c b/net/netfilter/nf_nat_proto_udp.c new file mode 100644 index 000000000000..7df613fb34a2 --- /dev/null +++ b/net/netfilter/nf_nat_proto_udp.c @@ -0,0 +1,76 @@ +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2006 Netfilter Core Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +static u16 udp_port_rover; + +static void +udp_unique_tuple(const struct nf_nat_l3proto *l3proto, + struct nf_conntrack_tuple *tuple, + const struct nf_nat_range *range, + enum nf_nat_manip_type maniptype, + const struct nf_conn *ct) +{ + nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct, + &udp_port_rover); +} + +static bool +udp_manip_pkt(struct sk_buff *skb, + const struct nf_nat_l3proto *l3proto, + unsigned int iphdroff, unsigned int hdroff, + const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype) +{ + struct udphdr *hdr; + __be16 *portptr, newport; + + if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) + return false; + hdr = (struct udphdr *)(skb->data + hdroff); + + if (maniptype == NF_NAT_MANIP_SRC) { + /* Get rid of src port */ + newport = tuple->src.u.udp.port; + portptr = &hdr->source; + } else { + /* Get rid of dst port */ + newport = tuple->dst.u.udp.port; + portptr = &hdr->dest; + } + if (hdr->check || skb->ip_summed == CHECKSUM_PARTIAL) { + l3proto->csum_update(skb, iphdroff, &hdr->check, + tuple, maniptype); + inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport, + 0); + if (!hdr->check) + hdr->check = CSUM_MANGLED_0; + } + *portptr = newport; + return true; +} + +const struct nf_nat_l4proto nf_nat_l4proto_udp = { + .l4proto = IPPROTO_UDP, + .manip_pkt = udp_manip_pkt, + .in_range = nf_nat_l4proto_in_range, + .unique_tuple = udp_unique_tuple, +#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) + .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, +#endif +}; diff --git a/net/netfilter/nf_nat_proto_udplite.c b/net/netfilter/nf_nat_proto_udplite.c new file mode 100644 index 000000000000..776a0d1317b1 --- /dev/null +++ b/net/netfilter/nf_nat_proto_udplite.c @@ -0,0 +1,106 @@ +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2006 Netfilter Core Team + * (C) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include + +static u16 udplite_port_rover; + +static void +udplite_unique_tuple(const struct nf_nat_l3proto *l3proto, + struct nf_conntrack_tuple *tuple, + const struct nf_nat_range *range, + enum nf_nat_manip_type maniptype, + const struct nf_conn *ct) +{ + nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct, + &udplite_port_rover); +} + +static bool +udplite_manip_pkt(struct sk_buff *skb, + const struct nf_nat_l3proto *l3proto, + unsigned int iphdroff, unsigned int hdroff, + const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype) +{ + struct udphdr *hdr; + __be16 *portptr, newport; + + if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) + return false; + + hdr = (struct udphdr *)(skb->data + hdroff); + + if (maniptype == NF_NAT_MANIP_SRC) { + /* Get rid of source port */ + newport = tuple->src.u.udp.port; + portptr = &hdr->source; + } else { + /* Get rid of dst port */ + newport = tuple->dst.u.udp.port; + portptr = &hdr->dest; + } + + l3proto->csum_update(skb, iphdroff, &hdr->check, tuple, maniptype); + inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport, 0); + if (!hdr->check) + hdr->check = CSUM_MANGLED_0; + + *portptr = newport; + return true; +} + +static const struct nf_nat_l4proto nf_nat_l4proto_udplite = { + .l4proto = IPPROTO_UDPLITE, + .manip_pkt = udplite_manip_pkt, + .in_range = nf_nat_l4proto_in_range, + .unique_tuple = udplite_unique_tuple, +#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) + .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, +#endif +}; + +static int __init nf_nat_proto_udplite_init(void) +{ + int err; + + err = nf_nat_l4proto_register(NFPROTO_IPV4, &nf_nat_l4proto_udplite); + if (err < 0) + goto err1; + err = nf_nat_l4proto_register(NFPROTO_IPV6, &nf_nat_l4proto_udplite); + if (err < 0) + goto err2; + return 0; + +err2: + nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_udplite); +err1: + return err; +} + +static void __exit nf_nat_proto_udplite_fini(void) +{ + nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_udplite); + nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_udplite); +} + +module_init(nf_nat_proto_udplite_init); +module_exit(nf_nat_proto_udplite_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("UDP-Lite NAT protocol helper"); +MODULE_AUTHOR("Patrick McHardy "); diff --git a/net/netfilter/nf_nat_proto_unknown.c b/net/netfilter/nf_nat_proto_unknown.c new file mode 100644 index 000000000000..6e494d584412 --- /dev/null +++ b/net/netfilter/nf_nat_proto_unknown.c @@ -0,0 +1,54 @@ +/* The "unknown" protocol. This is what is used for protocols we + * don't understand. It's returned by ip_ct_find_proto(). + */ + +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2006 Netfilter Core Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include + +#include +#include +#include + +static bool unknown_in_range(const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type manip_type, + const union nf_conntrack_man_proto *min, + const union nf_conntrack_man_proto *max) +{ + return true; +} + +static void unknown_unique_tuple(const struct nf_nat_l3proto *l3proto, + struct nf_conntrack_tuple *tuple, + const struct nf_nat_range *range, + enum nf_nat_manip_type maniptype, + const struct nf_conn *ct) +{ + /* Sorry: we can't help you; if it's not unique, we can't frob + * anything. + */ + return; +} + +static bool +unknown_manip_pkt(struct sk_buff *skb, + const struct nf_nat_l3proto *l3proto, + unsigned int iphdroff, unsigned int hdroff, + const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype) +{ + return true; +} + +const struct nf_nat_l4proto nf_nat_l4proto_unknown = { + .manip_pkt = unknown_manip_pkt, + .in_range = unknown_in_range, + .unique_tuple = unknown_unique_tuple, +}; diff --git a/net/netfilter/xt_nat.c b/net/netfilter/xt_nat.c new file mode 100644 index 000000000000..7521368a6034 --- /dev/null +++ b/net/netfilter/xt_nat.c @@ -0,0 +1,167 @@ +/* + * (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2006 Netfilter Core Team + * (C) 2011 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include + +static int xt_nat_checkentry_v0(const struct xt_tgchk_param *par) +{ + const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; + + if (mr->rangesize != 1) { + pr_info("%s: multiple ranges no longer supported\n", + par->target->name); + return -EINVAL; + } + return 0; +} + +static void xt_nat_convert_range(struct nf_nat_range *dst, + const struct nf_nat_ipv4_range *src) +{ + memset(&dst->min_addr, 0, sizeof(dst->min_addr)); + memset(&dst->max_addr, 0, sizeof(dst->max_addr)); + + dst->flags = src->flags; + dst->min_addr.ip = src->min_ip; + dst->max_addr.ip = src->max_ip; + dst->min_proto = src->min; + dst->max_proto = src->max; +} + +static unsigned int +xt_snat_target_v0(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; + struct nf_nat_range range; + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + + ct = nf_ct_get(skb, &ctinfo); + NF_CT_ASSERT(ct != NULL && + (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || + ctinfo == IP_CT_RELATED_REPLY)); + + xt_nat_convert_range(&range, &mr->range[0]); + return nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC); +} + +static unsigned int +xt_dnat_target_v0(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; + struct nf_nat_range range; + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + + ct = nf_ct_get(skb, &ctinfo); + NF_CT_ASSERT(ct != NULL && + (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); + + xt_nat_convert_range(&range, &mr->range[0]); + return nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST); +} + +static unsigned int +xt_snat_target_v1(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct nf_nat_range *range = par->targinfo; + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + + ct = nf_ct_get(skb, &ctinfo); + NF_CT_ASSERT(ct != NULL && + (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || + ctinfo == IP_CT_RELATED_REPLY)); + + return nf_nat_setup_info(ct, range, NF_NAT_MANIP_SRC); +} + +static unsigned int +xt_dnat_target_v1(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct nf_nat_range *range = par->targinfo; + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + + ct = nf_ct_get(skb, &ctinfo); + NF_CT_ASSERT(ct != NULL && + (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); + + return nf_nat_setup_info(ct, range, NF_NAT_MANIP_DST); +} + +static struct xt_target xt_nat_target_reg[] __read_mostly = { + { + .name = "SNAT", + .revision = 0, + .checkentry = xt_nat_checkentry_v0, + .target = xt_snat_target_v0, + .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), + .family = NFPROTO_IPV4, + .table = "nat", + .hooks = (1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_LOCAL_OUT), + .me = THIS_MODULE, + }, + { + .name = "DNAT", + .revision = 0, + .checkentry = xt_nat_checkentry_v0, + .target = xt_dnat_target_v0, + .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), + .family = NFPROTO_IPV4, + .table = "nat", + .hooks = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN), + .me = THIS_MODULE, + }, + { + .name = "SNAT", + .revision = 1, + .target = xt_snat_target_v1, + .targetsize = sizeof(struct nf_nat_range), + .table = "nat", + .hooks = (1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_LOCAL_OUT), + .me = THIS_MODULE, + }, + { + .name = "DNAT", + .revision = 1, + .target = xt_dnat_target_v1, + .targetsize = sizeof(struct nf_nat_range), + .table = "nat", + .hooks = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN), + .me = THIS_MODULE, + }, +}; + +static int __init xt_nat_init(void) +{ + return xt_register_targets(xt_nat_target_reg, + ARRAY_SIZE(xt_nat_target_reg)); +} + +static void __exit xt_nat_exit(void) +{ + xt_unregister_targets(xt_nat_target_reg, ARRAY_SIZE(xt_nat_target_reg)); +} + +module_init(xt_nat_init); +module_exit(xt_nat_exit); + +MODULE_LICENSE("GPL"); +MODULE_ALIAS("ipt_SNAT"); +MODULE_ALIAS("ipt_DNAT"); -- cgit v1.2.3 From 58a317f1061c894d2344c0b6a18ab4a64b69b815 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 26 Aug 2012 19:14:12 +0200 Subject: netfilter: ipv6: add IPv6 NAT support Signed-off-by: Patrick McHardy --- include/linux/netfilter/nfnetlink_conntrack.h | 2 + include/net/netfilter/nf_nat_l3proto.h | 5 + include/net/netfilter/nf_nat_l4proto.h | 1 + include/net/netns/ipv6.h | 1 + net/core/secure_seq.c | 1 + net/ipv6/netfilter/Kconfig | 12 + net/ipv6/netfilter/Makefile | 4 + net/ipv6/netfilter/ip6table_nat.c | 321 +++++++++++++++++++++++++ net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 37 ++- net/ipv6/netfilter/nf_nat_l3proto_ipv6.c | 287 ++++++++++++++++++++++ net/ipv6/netfilter/nf_nat_proto_icmpv6.c | 90 +++++++ net/netfilter/nf_nat_core.c | 2 + net/netfilter/xt_nat.c | 3 + 13 files changed, 764 insertions(+), 2 deletions(-) create mode 100644 net/ipv6/netfilter/ip6table_nat.c create mode 100644 net/ipv6/netfilter/nf_nat_l3proto_ipv6.c create mode 100644 net/ipv6/netfilter/nf_nat_proto_icmpv6.c (limited to 'include/net/netns') diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h index 68920eab287c..43bfe3e1685b 100644 --- a/include/linux/netfilter/nfnetlink_conntrack.h +++ b/include/linux/netfilter/nfnetlink_conntrack.h @@ -147,6 +147,8 @@ enum ctattr_nat { CTA_NAT_V4_MAXIP, #define CTA_NAT_MAXIP CTA_NAT_V4_MAXIP CTA_NAT_PROTO, + CTA_NAT_V6_MINIP, + CTA_NAT_V6_MAXIP, __CTA_NAT_MAX }; #define CTA_NAT_MAX (__CTA_NAT_MAX - 1) diff --git a/include/net/netfilter/nf_nat_l3proto.h b/include/net/netfilter/nf_nat_l3proto.h index beed96961fa7..bd3b97e02c82 100644 --- a/include/net/netfilter/nf_nat_l3proto.h +++ b/include/net/netfilter/nf_nat_l3proto.h @@ -43,5 +43,10 @@ extern int nf_nat_icmp_reply_translation(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int hooknum); +extern int nf_nat_icmpv6_reply_translation(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int hooknum, + unsigned int hdrlen); #endif /* _NF_NAT_L3PROTO_H */ diff --git a/include/net/netfilter/nf_nat_l4proto.h b/include/net/netfilter/nf_nat_l4proto.h index 1f0a4f018fcf..24feb68d1bcc 100644 --- a/include/net/netfilter/nf_nat_l4proto.h +++ b/include/net/netfilter/nf_nat_l4proto.h @@ -51,6 +51,7 @@ extern const struct nf_nat_l4proto *__nf_nat_l4proto_find(u8 l3proto, u8 l4proto extern const struct nf_nat_l4proto nf_nat_l4proto_tcp; extern const struct nf_nat_l4proto nf_nat_l4proto_udp; extern const struct nf_nat_l4proto nf_nat_l4proto_icmp; +extern const struct nf_nat_l4proto nf_nat_l4proto_icmpv6; extern const struct nf_nat_l4proto nf_nat_l4proto_unknown; extern bool nf_nat_l4proto_in_range(const struct nf_conntrack_tuple *tuple, diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index df0a5456a3fd..0318104a9458 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -42,6 +42,7 @@ struct netns_ipv6 { #ifdef CONFIG_SECURITY struct xt_table *ip6table_security; #endif + struct xt_table *ip6table_nat; #endif struct rt6_info *ip6_null_entry; struct rt6_statistics *rt6_stats; diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index 99b2596531bb..e61a8bb7fce7 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c @@ -76,6 +76,7 @@ u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, return hash[0]; } +EXPORT_SYMBOL(secure_ipv6_port_ephemeral); #endif #ifdef CONFIG_INET diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 10135342799e..b27e0ad4b738 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -25,6 +25,18 @@ config NF_CONNTRACK_IPV6 To compile it as a module, choose M here. If unsure, say N. +config NF_NAT_IPV6 + tristate "IPv6 NAT" + depends on NF_CONNTRACK_IPV6 + depends on NETFILTER_ADVANCED + select NF_NAT + help + The IPv6 NAT option allows masquerading, port forwarding and other + forms of full Network Address Port Translation. It is controlled by + the `nat' table in ip6tables, see the man page for ip6tables(8). + + To compile it as a module, choose M here. If unsure, say N. + config IP6_NF_IPTABLES tristate "IP6 tables support (required for filtering)" depends on INET && IPV6 diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 534d3f216f7b..76779376da4c 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -8,6 +8,7 @@ obj-$(CONFIG_IP6_NF_FILTER) += ip6table_filter.o obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_mangle.o obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o +obj-$(CONFIG_NF_NAT_IPV6) += ip6table_nat.o # objects for l3 independent conntrack nf_conntrack_ipv6-y := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o @@ -15,6 +16,9 @@ nf_conntrack_ipv6-y := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o # l3 independent conntrack obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o nf_defrag_ipv6.o +nf_nat_ipv6-y := nf_nat_l3proto_ipv6.o nf_nat_proto_icmpv6.o +obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o + # defrag nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c new file mode 100644 index 000000000000..e418bd6350a4 --- /dev/null +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -0,0 +1,321 @@ +/* + * Copyright (c) 2011 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Based on Rusty Russell's IPv4 NAT code. Development of IPv6 NAT + * funded by Astaro. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +static const struct xt_table nf_nat_ipv6_table = { + .name = "nat", + .valid_hooks = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_LOCAL_IN), + .me = THIS_MODULE, + .af = NFPROTO_IPV6, +}; + +static unsigned int alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) +{ + /* Force range to this IP; let proto decide mapping for + * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED). + */ + struct nf_nat_range range; + + range.flags = 0; + pr_debug("Allocating NULL binding for %p (%pI6)\n", ct, + HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ? + &ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip6 : + &ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip6); + + return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum)); +} + +static unsigned int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum, + const struct net_device *in, + const struct net_device *out, + struct nf_conn *ct) +{ + struct net *net = nf_ct_net(ct); + unsigned int ret; + + ret = ip6t_do_table(skb, hooknum, in, out, net->ipv6.ip6table_nat); + if (ret == NF_ACCEPT) { + if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum))) + ret = alloc_null_binding(ct, hooknum); + } + return ret; +} + +static unsigned int +nf_nat_ipv6_fn(unsigned int hooknum, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + struct nf_conn_nat *nat; + enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum); + __be16 frag_off; + int hdrlen; + u8 nexthdr; + + ct = nf_ct_get(skb, &ctinfo); + /* Can't track? It's not due to stress, or conntrack would + * have dropped it. Hence it's the user's responsibilty to + * packet filter it out, or implement conntrack/NAT for that + * protocol. 8) --RR + */ + if (!ct) + return NF_ACCEPT; + + /* Don't try to NAT if this packet is not conntracked */ + if (nf_ct_is_untracked(ct)) + return NF_ACCEPT; + + nat = nfct_nat(ct); + if (!nat) { + /* NAT module was loaded late. */ + if (nf_ct_is_confirmed(ct)) + return NF_ACCEPT; + nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); + if (nat == NULL) { + pr_debug("failed to add NAT extension\n"); + return NF_ACCEPT; + } + } + + switch (ctinfo) { + case IP_CT_RELATED: + case IP_CT_RELATED_REPLY: + nexthdr = ipv6_hdr(skb)->nexthdr; + hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), + &nexthdr, &frag_off); + + if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) { + if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo, + hooknum, hdrlen)) + return NF_DROP; + else + return NF_ACCEPT; + } + /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */ + case IP_CT_NEW: + /* Seen it before? This can happen for loopback, retrans, + * or local packets. + */ + if (!nf_nat_initialized(ct, maniptype)) { + unsigned int ret; + + ret = nf_nat_rule_find(skb, hooknum, in, out, ct); + if (ret != NF_ACCEPT) + return ret; + } else + pr_debug("Already setup manip %s for ct %p\n", + maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST", + ct); + break; + + default: + /* ESTABLISHED */ + NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || + ctinfo == IP_CT_ESTABLISHED_REPLY); + } + + return nf_nat_packet(ct, ctinfo, hooknum, skb); +} + +static unsigned int +nf_nat_ipv6_in(unsigned int hooknum, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + unsigned int ret; + struct in6_addr daddr = ipv6_hdr(skb)->daddr; + + ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn); + if (ret != NF_DROP && ret != NF_STOLEN && + ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr)) + skb_dst_drop(skb); + + return ret; +} + +static unsigned int +nf_nat_ipv6_out(unsigned int hooknum, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ +#ifdef CONFIG_XFRM + const struct nf_conn *ct; + enum ip_conntrack_info ctinfo; +#endif + unsigned int ret; + + /* root is playing with raw sockets. */ + if (skb->len < sizeof(struct ipv6hdr)) + return NF_ACCEPT; + + ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn); +#ifdef CONFIG_XFRM + if (ret != NF_DROP && ret != NF_STOLEN && + !(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && + (ct = nf_ct_get(skb, &ctinfo)) != NULL) { + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + + if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3, + &ct->tuplehash[!dir].tuple.dst.u3) || + (ct->tuplehash[dir].tuple.src.u.all != + ct->tuplehash[!dir].tuple.dst.u.all)) + if (nf_xfrm_me_harder(skb, AF_INET6) < 0) + ret = NF_DROP; + } +#endif + return ret; +} + +static unsigned int +nf_nat_ipv6_local_fn(unsigned int hooknum, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + const struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + unsigned int ret; + + /* root is playing with raw sockets. */ + if (skb->len < sizeof(struct ipv6hdr)) + return NF_ACCEPT; + + ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn); + if (ret != NF_DROP && ret != NF_STOLEN && + (ct = nf_ct_get(skb, &ctinfo)) != NULL) { + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + + if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3, + &ct->tuplehash[!dir].tuple.src.u3)) { + if (ip6_route_me_harder(skb)) + ret = NF_DROP; + } +#ifdef CONFIG_XFRM + else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && + ct->tuplehash[dir].tuple.dst.u.all != + ct->tuplehash[!dir].tuple.src.u.all) + if (nf_xfrm_me_harder(skb, AF_INET6)) + ret = NF_DROP; +#endif + } + return ret; +} + +static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = { + /* Before packet filtering, change destination */ + { + .hook = nf_nat_ipv6_in, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_PRE_ROUTING, + .priority = NF_IP6_PRI_NAT_DST, + }, + /* After packet filtering, change source */ + { + .hook = nf_nat_ipv6_out, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_POST_ROUTING, + .priority = NF_IP6_PRI_NAT_SRC, + }, + /* Before packet filtering, change destination */ + { + .hook = nf_nat_ipv6_local_fn, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_LOCAL_OUT, + .priority = NF_IP6_PRI_NAT_DST, + }, + /* After packet filtering, change source */ + { + .hook = nf_nat_ipv6_fn, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_LOCAL_IN, + .priority = NF_IP6_PRI_NAT_SRC, + }, +}; + +static int __net_init ip6table_nat_net_init(struct net *net) +{ + struct ip6t_replace *repl; + + repl = ip6t_alloc_initial_table(&nf_nat_ipv6_table); + if (repl == NULL) + return -ENOMEM; + net->ipv6.ip6table_nat = ip6t_register_table(net, &nf_nat_ipv6_table, repl); + kfree(repl); + if (IS_ERR(net->ipv6.ip6table_nat)) + return PTR_ERR(net->ipv6.ip6table_nat); + return 0; +} + +static void __net_exit ip6table_nat_net_exit(struct net *net) +{ + ip6t_unregister_table(net, net->ipv6.ip6table_nat); +} + +static struct pernet_operations ip6table_nat_net_ops = { + .init = ip6table_nat_net_init, + .exit = ip6table_nat_net_exit, +}; + +static int __init ip6table_nat_init(void) +{ + int err; + + err = register_pernet_subsys(&ip6table_nat_net_ops); + if (err < 0) + goto err1; + + err = nf_register_hooks(nf_nat_ipv6_ops, ARRAY_SIZE(nf_nat_ipv6_ops)); + if (err < 0) + goto err2; + return 0; + +err2: + unregister_pernet_subsys(&ip6table_nat_net_ops); +err1: + return err; +} + +static void __exit ip6table_nat_exit(void) +{ + nf_unregister_hooks(nf_nat_ipv6_ops, ARRAY_SIZE(nf_nat_ipv6_ops)); + unregister_pernet_subsys(&ip6table_nat_net_ops); +} + +module_init(ip6table_nat_init); +module_exit(ip6table_nat_exit); + +MODULE_LICENSE("GPL"); diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index dcf6010f68d9..8860d23e61cf 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -142,6 +143,36 @@ static unsigned int ipv6_confirm(unsigned int hooknum, const struct net_device *out, int (*okfn)(struct sk_buff *)) { + struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + unsigned char pnum = ipv6_hdr(skb)->nexthdr; + int protoff; + __be16 frag_off; + + ct = nf_ct_get(skb, &ctinfo); + if (!ct || ctinfo == IP_CT_RELATED_REPLY) + goto out; + + protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum, + &frag_off); + if (protoff < 0 || (frag_off & htons(~0x7)) != 0) { + pr_debug("proto header not found\n"); + goto out; + } + + /* adjust seqs for loopback traffic only in outgoing direction */ + if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) && + !nf_is_loopback_packet(skb)) { + typeof(nf_nat_seq_adjust_hook) seq_adjust; + + seq_adjust = rcu_dereference(nf_nat_seq_adjust_hook); + if (!seq_adjust || + !seq_adjust(skb, ct, ctinfo, protoff)) { + NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop); + return NF_DROP; + } + } +out: /* We've seen it coming out the other side: confirm it */ return nf_conntrack_confirm(skb); } @@ -170,12 +201,14 @@ static unsigned int __ipv6_conntrack_in(struct net *net, } /* Conntrack helpers need the entire reassembled packet in the - * POST_ROUTING hook. + * POST_ROUTING hook. In case of unconfirmed connections NAT + * might reassign a helper, so the entire packet is also + * required. */ ct = nf_ct_get(reasm, &ctinfo); if (ct != NULL && !nf_ct_is_untracked(ct)) { help = nfct_help(ct); - if (help && help->helper) { + if ((help && help->helper) || !nf_ct_is_confirmed(ct)) { nf_conntrack_get_reasm(skb); NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, reasm, (struct net_device *)in, diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c new file mode 100644 index 000000000000..81a2d1c3da8e --- /dev/null +++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c @@ -0,0 +1,287 @@ +/* + * Copyright (c) 2011 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of IPv6 NAT funded by Astaro. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +static const struct nf_nat_l3proto nf_nat_l3proto_ipv6; + +#ifdef CONFIG_XFRM +static void nf_nat_ipv6_decode_session(struct sk_buff *skb, + const struct nf_conn *ct, + enum ip_conntrack_dir dir, + unsigned long statusbit, + struct flowi *fl) +{ + const struct nf_conntrack_tuple *t = &ct->tuplehash[dir].tuple; + struct flowi6 *fl6 = &fl->u.ip6; + + if (ct->status & statusbit) { + fl6->daddr = t->dst.u3.in6; + if (t->dst.protonum == IPPROTO_TCP || + t->dst.protonum == IPPROTO_UDP || + t->dst.protonum == IPPROTO_UDPLITE || + t->dst.protonum == IPPROTO_DCCP || + t->dst.protonum == IPPROTO_SCTP) + fl6->fl6_dport = t->dst.u.all; + } + + statusbit ^= IPS_NAT_MASK; + + if (ct->status & statusbit) { + fl6->saddr = t->src.u3.in6; + if (t->dst.protonum == IPPROTO_TCP || + t->dst.protonum == IPPROTO_UDP || + t->dst.protonum == IPPROTO_UDPLITE || + t->dst.protonum == IPPROTO_DCCP || + t->dst.protonum == IPPROTO_SCTP) + fl6->fl6_sport = t->src.u.all; + } +} +#endif + +static bool nf_nat_ipv6_in_range(const struct nf_conntrack_tuple *t, + const struct nf_nat_range *range) +{ + return ipv6_addr_cmp(&t->src.u3.in6, &range->min_addr.in6) >= 0 && + ipv6_addr_cmp(&t->src.u3.in6, &range->max_addr.in6) <= 0; +} + +static u32 nf_nat_ipv6_secure_port(const struct nf_conntrack_tuple *t, + __be16 dport) +{ + return secure_ipv6_port_ephemeral(t->src.u3.ip6, t->dst.u3.ip6, dport); +} + +static bool nf_nat_ipv6_manip_pkt(struct sk_buff *skb, + unsigned int iphdroff, + const struct nf_nat_l4proto *l4proto, + const struct nf_conntrack_tuple *target, + enum nf_nat_manip_type maniptype) +{ + struct ipv6hdr *ipv6h; + __be16 frag_off; + int hdroff; + u8 nexthdr; + + if (!skb_make_writable(skb, iphdroff + sizeof(*ipv6h))) + return false; + + ipv6h = (void *)skb->data + iphdroff; + nexthdr = ipv6h->nexthdr; + hdroff = ipv6_skip_exthdr(skb, iphdroff + sizeof(*ipv6h), + &nexthdr, &frag_off); + if (hdroff < 0) + goto manip_addr; + + if ((frag_off & htons(~0x7)) == 0 && + !l4proto->manip_pkt(skb, &nf_nat_l3proto_ipv6, iphdroff, hdroff, + target, maniptype)) + return false; +manip_addr: + if (maniptype == NF_NAT_MANIP_SRC) + ipv6h->saddr = target->src.u3.in6; + else + ipv6h->daddr = target->dst.u3.in6; + + return true; +} + +static void nf_nat_ipv6_csum_update(struct sk_buff *skb, + unsigned int iphdroff, __sum16 *check, + const struct nf_conntrack_tuple *t, + enum nf_nat_manip_type maniptype) +{ + const struct ipv6hdr *ipv6h = (struct ipv6hdr *)(skb->data + iphdroff); + const struct in6_addr *oldip, *newip; + + if (maniptype == NF_NAT_MANIP_SRC) { + oldip = &ipv6h->saddr; + newip = &t->src.u3.in6; + } else { + oldip = &ipv6h->daddr; + newip = &t->dst.u3.in6; + } + inet_proto_csum_replace16(check, skb, oldip->s6_addr32, + newip->s6_addr32, 1); +} + +static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb, + u8 proto, void *data, __sum16 *check, + int datalen, int oldlen) +{ + const struct ipv6hdr *ipv6h = ipv6_hdr(skb); + struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); + + if (skb->ip_summed != CHECKSUM_PARTIAL) { + if (!(rt->rt6i_flags & RTF_LOCAL) && + (!skb->dev || skb->dev->features & NETIF_F_V6_CSUM)) { + skb->ip_summed = CHECKSUM_PARTIAL; + skb->csum_start = skb_headroom(skb) + + skb_network_offset(skb) + + (data - (void *)skb->data); + skb->csum_offset = (void *)check - data; + *check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, + datalen, proto, 0); + } else { + *check = 0; + *check = csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, + datalen, proto, + csum_partial(data, datalen, + 0)); + if (proto == IPPROTO_UDP && !*check) + *check = CSUM_MANGLED_0; + } + } else + inet_proto_csum_replace2(check, skb, + htons(oldlen), htons(datalen), 1); +} + +static int nf_nat_ipv6_nlattr_to_range(struct nlattr *tb[], + struct nf_nat_range *range) +{ + if (tb[CTA_NAT_V6_MINIP]) { + nla_memcpy(&range->min_addr.ip6, tb[CTA_NAT_V6_MINIP], + sizeof(struct in6_addr)); + range->flags |= NF_NAT_RANGE_MAP_IPS; + } + + if (tb[CTA_NAT_V6_MAXIP]) + nla_memcpy(&range->max_addr.ip6, tb[CTA_NAT_V6_MAXIP], + sizeof(struct in6_addr)); + else + range->max_addr = range->min_addr; + + return 0; +} + +static const struct nf_nat_l3proto nf_nat_l3proto_ipv6 = { + .l3proto = NFPROTO_IPV6, + .secure_port = nf_nat_ipv6_secure_port, + .in_range = nf_nat_ipv6_in_range, + .manip_pkt = nf_nat_ipv6_manip_pkt, + .csum_update = nf_nat_ipv6_csum_update, + .csum_recalc = nf_nat_ipv6_csum_recalc, + .nlattr_to_range = nf_nat_ipv6_nlattr_to_range, +#ifdef CONFIG_XFRM + .decode_session = nf_nat_ipv6_decode_session, +#endif +}; + +int nf_nat_icmpv6_reply_translation(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int hooknum, + unsigned int hdrlen) +{ + struct { + struct icmp6hdr icmp6; + struct ipv6hdr ip6; + } *inside; + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + enum nf_nat_manip_type manip = HOOK2MANIP(hooknum); + const struct nf_nat_l4proto *l4proto; + struct nf_conntrack_tuple target; + unsigned long statusbit; + + NF_CT_ASSERT(ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY); + + if (!skb_make_writable(skb, hdrlen + sizeof(*inside))) + return 0; + if (nf_ip6_checksum(skb, hooknum, hdrlen, IPPROTO_ICMPV6)) + return 0; + + inside = (void *)skb->data + hdrlen; + if (inside->icmp6.icmp6_type == NDISC_REDIRECT) { + if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK) + return 0; + if (ct->status & IPS_NAT_MASK) + return 0; + } + + if (manip == NF_NAT_MANIP_SRC) + statusbit = IPS_SRC_NAT; + else + statusbit = IPS_DST_NAT; + + /* Invert if this is reply direction */ + if (dir == IP_CT_DIR_REPLY) + statusbit ^= IPS_NAT_MASK; + + if (!(ct->status & statusbit)) + return 1; + + l4proto = __nf_nat_l4proto_find(NFPROTO_IPV6, inside->ip6.nexthdr); + if (!nf_nat_ipv6_manip_pkt(skb, hdrlen + sizeof(inside->icmp6), + l4proto, &ct->tuplehash[!dir].tuple, !manip)) + return 0; + + if (skb->ip_summed != CHECKSUM_PARTIAL) { + struct ipv6hdr *ipv6h = ipv6_hdr(skb); + inside = (void *)skb->data + hdrlen; + inside->icmp6.icmp6_cksum = 0; + inside->icmp6.icmp6_cksum = + csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, + skb->len - hdrlen, IPPROTO_ICMPV6, + csum_partial(&inside->icmp6, + skb->len - hdrlen, 0)); + } + + nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); + l4proto = __nf_nat_l4proto_find(NFPROTO_IPV6, IPPROTO_ICMPV6); + if (!nf_nat_ipv6_manip_pkt(skb, 0, l4proto, &target, manip)) + return 0; + + return 1; +} +EXPORT_SYMBOL_GPL(nf_nat_icmpv6_reply_translation); + +static int __init nf_nat_l3proto_ipv6_init(void) +{ + int err; + + err = nf_nat_l4proto_register(NFPROTO_IPV6, &nf_nat_l4proto_icmpv6); + if (err < 0) + goto err1; + err = nf_nat_l3proto_register(&nf_nat_l3proto_ipv6); + if (err < 0) + goto err2; + return err; + +err2: + nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_icmpv6); +err1: + return err; +} + +static void __exit nf_nat_l3proto_ipv6_exit(void) +{ + nf_nat_l3proto_unregister(&nf_nat_l3proto_ipv6); + nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_icmpv6); +} + +MODULE_LICENSE("GPL"); +MODULE_ALIAS("nf-nat-" __stringify(AF_INET6)); + +module_init(nf_nat_l3proto_ipv6_init); +module_exit(nf_nat_l3proto_ipv6_exit); diff --git a/net/ipv6/netfilter/nf_nat_proto_icmpv6.c b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c new file mode 100644 index 000000000000..5d6da784305b --- /dev/null +++ b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2011 Patrick Mchardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Based on Rusty Russell's IPv4 ICMP NAT code. Development of IPv6 + * NAT funded by Astaro. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include + +static bool +icmpv6_in_range(const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype, + const union nf_conntrack_man_proto *min, + const union nf_conntrack_man_proto *max) +{ + return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) && + ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id); +} + +static void +icmpv6_unique_tuple(const struct nf_nat_l3proto *l3proto, + struct nf_conntrack_tuple *tuple, + const struct nf_nat_range *range, + enum nf_nat_manip_type maniptype, + const struct nf_conn *ct) +{ + static u16 id; + unsigned int range_size; + unsigned int i; + + range_size = ntohs(range->max_proto.icmp.id) - + ntohs(range->min_proto.icmp.id) + 1; + + if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) + range_size = 0xffff; + + for (i = 0; ; ++id) { + tuple->src.u.icmp.id = htons(ntohs(range->min_proto.icmp.id) + + (id % range_size)); + if (++i == range_size || !nf_nat_used_tuple(tuple, ct)) + return; + } +} + +static bool +icmpv6_manip_pkt(struct sk_buff *skb, + const struct nf_nat_l3proto *l3proto, + unsigned int iphdroff, unsigned int hdroff, + const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype) +{ + struct icmp6hdr *hdr; + + if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) + return false; + + hdr = (struct icmp6hdr *)(skb->data + hdroff); + l3proto->csum_update(skb, iphdroff, &hdr->icmp6_cksum, + tuple, maniptype); + if (hdr->icmp6_code == ICMPV6_ECHO_REQUEST || + hdr->icmp6_code == ICMPV6_ECHO_REPLY) { + inet_proto_csum_replace2(&hdr->icmp6_cksum, skb, + hdr->icmp6_identifier, + tuple->src.u.icmp.id, 0); + hdr->icmp6_identifier = tuple->src.u.icmp.id; + } + return true; +} + +const struct nf_nat_l4proto nf_nat_l4proto_icmpv6 = { + .l4proto = IPPROTO_ICMPV6, + .manip_pkt = icmpv6_manip_pkt, + .in_range = icmpv6_in_range, + .unique_tuple = icmpv6_unique_tuple, +#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) + .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, +#endif +}; diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index c577b753fb9a..29d445235199 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -696,6 +696,8 @@ static int nfnetlink_parse_nat_proto(struct nlattr *attr, static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = { [CTA_NAT_V4_MINIP] = { .type = NLA_U32 }, [CTA_NAT_V4_MAXIP] = { .type = NLA_U32 }, + [CTA_NAT_V6_MINIP] = { .len = sizeof(struct in6_addr) }, + [CTA_NAT_V6_MAXIP] = { .len = sizeof(struct in6_addr) }, [CTA_NAT_PROTO] = { .type = NLA_NESTED }, }; diff --git a/net/netfilter/xt_nat.c b/net/netfilter/xt_nat.c index 7521368a6034..81aafa8e4fef 100644 --- a/net/netfilter/xt_nat.c +++ b/net/netfilter/xt_nat.c @@ -163,5 +163,8 @@ module_init(xt_nat_init); module_exit(xt_nat_exit); MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); MODULE_ALIAS("ipt_SNAT"); MODULE_ALIAS("ipt_DNAT"); +MODULE_ALIAS("ip6t_SNAT"); +MODULE_ALIAS("ip6t_DNAT"); -- cgit v1.2.3 From b42664f898c976247f7f609b8bb9c94d7475ca10 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 10 Sep 2012 22:09:44 +0000 Subject: netns: move net->ipv4.rt_genid to net->rt_genid This commit prepares the use of rt_genid by both IPv4 and IPv6. Initialization is left in IPv4 part. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/net/net_namespace.h | 10 ++++++++++ include/net/netns/ipv4.h | 1 - net/ipv4/route.c | 9 ++------- 3 files changed, 12 insertions(+), 8 deletions(-) (limited to 'include/net/netns') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index ae1cd6c9ba52..fd87963a0ea5 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -102,6 +102,7 @@ struct net { #endif struct netns_ipvs *ipvs; struct sock *diag_nlsk; + atomic_t rt_genid; }; @@ -300,5 +301,14 @@ static inline void unregister_net_sysctl_table(struct ctl_table_header *header) } #endif +static inline int rt_genid(struct net *net) +{ + return atomic_read(&net->rt_genid); +} + +static inline void rt_genid_bump(struct net *net) +{ + atomic_inc(&net->rt_genid); +} #endif /* __NET_NET_NAMESPACE_H */ diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 1474dd65c66f..eb24dbccd81e 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -65,7 +65,6 @@ struct netns_ipv4 { unsigned int sysctl_ping_group_range[2]; long sysctl_tcp_mem[3]; - atomic_t rt_genid; atomic_t dev_addr_genid; #ifdef CONFIG_IP_MROUTE diff --git a/net/ipv4/route.c b/net/ipv4/route.c index be27cfa96e88..fd9af60397b5 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -202,11 +202,6 @@ EXPORT_SYMBOL(ip_tos2prio); static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); #define RT_CACHE_STAT_INC(field) __this_cpu_inc(rt_cache_stat.field) -static inline int rt_genid(struct net *net) -{ - return atomic_read(&net->ipv4.rt_genid); -} - #ifdef CONFIG_PROC_FS static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos) { @@ -449,7 +444,7 @@ static inline bool rt_is_expired(const struct rtable *rth) void rt_cache_flush(struct net *net) { - atomic_inc(&net->ipv4.rt_genid); + rt_genid_bump(net); } static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, @@ -2506,7 +2501,7 @@ static __net_initdata struct pernet_operations sysctl_route_ops = { static __net_init int rt_genid_init(struct net *net) { - atomic_set(&net->ipv4.rt_genid, 0); + atomic_set(&net->rt_genid, 0); get_random_bytes(&net->ipv4.dev_addr_genid, sizeof(net->ipv4.dev_addr_genid)); return 0; -- cgit v1.2.3 From c038a767cd697238b09f7a4ea5a504b4891774e9 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Tue, 18 Sep 2012 16:50:08 +0000 Subject: ipv6: add a new namespace for nf_conntrack_reasm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As pointed by Michal, it is necessary to add a new namespace for nf_conntrack_reasm code, this prepares for the second patch. Cc: Herbert Xu Cc: Michal Kubeček Cc: David Miller Cc: Patrick McHardy Cc: Pablo Neira Ayuso Cc: netfilter-devel@vger.kernel.org Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/net_namespace.h | 3 + include/net/netns/ipv6.h | 8 ++ net/ipv6/netfilter/nf_conntrack_reasm.c | 137 ++++++++++++++++++++++---------- 3 files changed, 106 insertions(+), 42 deletions(-) (limited to 'include/net/netns') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 5ae57f1ab755..d61e2b36d9e3 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -92,6 +92,9 @@ struct net { struct netns_xt xt; #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) struct netns_ct ct; +#endif +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) + struct netns_nf_frag nf_frag; #endif struct sock *nfnl; struct sock *nfnl_stash; diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 0318104a9458..214cb0a53359 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -71,4 +71,12 @@ struct netns_ipv6 { #endif #endif }; + +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) +struct netns_nf_frag { + struct netns_sysctl_ipv6 sysctl; + struct netns_frags frags; +}; +#endif + #endif diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index f94fb3ac2a79..f40f327ccc0c 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -71,27 +71,26 @@ struct nf_ct_frag6_queue }; static struct inet_frags nf_frags; -static struct netns_frags nf_init_frags; #ifdef CONFIG_SYSCTL static struct ctl_table nf_ct_frag6_sysctl_table[] = { { .procname = "nf_conntrack_frag6_timeout", - .data = &nf_init_frags.timeout, + .data = &init_net.nf_frag.frags.timeout, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_frag6_low_thresh", - .data = &nf_init_frags.low_thresh, + .data = &init_net.nf_frag.frags.low_thresh, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "nf_conntrack_frag6_high_thresh", - .data = &nf_init_frags.high_thresh, + .data = &init_net.nf_frag.frags.high_thresh, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec, @@ -99,7 +98,55 @@ static struct ctl_table nf_ct_frag6_sysctl_table[] = { { } }; -static struct ctl_table_header *nf_ct_frag6_sysctl_header; +static int __net_init nf_ct_frag6_sysctl_register(struct net *net) +{ + struct ctl_table *table; + struct ctl_table_header *hdr; + + table = nf_ct_frag6_sysctl_table; + if (!net_eq(net, &init_net)) { + table = kmemdup(table, sizeof(nf_ct_frag6_sysctl_table), + GFP_KERNEL); + if (table == NULL) + goto err_alloc; + + table[0].data = &net->ipv6.frags.high_thresh; + table[1].data = &net->ipv6.frags.low_thresh; + table[2].data = &net->ipv6.frags.timeout; + } + + hdr = register_net_sysctl(net, "net/netfilter", table); + if (hdr == NULL) + goto err_reg; + + net->ipv6.sysctl.frags_hdr = hdr; + return 0; + +err_reg: + if (!net_eq(net, &init_net)) + kfree(table); +err_alloc: + return -ENOMEM; +} + +static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net) +{ + struct ctl_table *table; + + table = net->nf_frag.sysctl.frags_hdr->ctl_table_arg; + unregister_net_sysctl_table(net->nf_frag.sysctl.frags_hdr); + if (!net_eq(net, &init_net)) + kfree(table); +} + +#else +static int __net_init nf_ct_frag6_sysctl_register(struct net *net) +{ + return 0; +} +static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net) +{ +} #endif static unsigned int nf_hashfn(struct inet_frag_queue *q) @@ -131,13 +178,6 @@ static __inline__ void fq_kill(struct nf_ct_frag6_queue *fq) inet_frag_kill(&fq->q, &nf_frags); } -static void nf_ct_frag6_evictor(void) -{ - local_bh_disable(); - inet_frag_evictor(&nf_init_frags, &nf_frags); - local_bh_enable(); -} - static void nf_ct_frag6_expire(unsigned long data) { struct nf_ct_frag6_queue *fq; @@ -158,9 +198,9 @@ out: } /* Creation primitives. */ - -static __inline__ struct nf_ct_frag6_queue * -fq_find(__be32 id, u32 user, struct in6_addr *src, struct in6_addr *dst) +static inline struct nf_ct_frag6_queue *fq_find(struct net *net, __be32 id, + u32 user, struct in6_addr *src, + struct in6_addr *dst) { struct inet_frag_queue *q; struct ip6_create_arg arg; @@ -174,7 +214,7 @@ fq_find(__be32 id, u32 user, struct in6_addr *src, struct in6_addr *dst) read_lock_bh(&nf_frags.lock); hash = inet6_hash_frag(id, src, dst, nf_frags.rnd); - q = inet_frag_find(&nf_init_frags, &nf_frags, &arg, hash); + q = inet_frag_find(&net->nf_frag.frags, &nf_frags, &arg, hash); local_bh_enable(); if (q == NULL) goto oom; @@ -312,7 +352,7 @@ found: fq->q.meat += skb->len; if (payload_len > fq->q.max_size) fq->q.max_size = payload_len; - atomic_add(skb->truesize, &nf_init_frags.mem); + atomic_add(skb->truesize, &fq->q.net->mem); /* The first fragment. * nhoffset is obtained from the first fragment, of course. @@ -322,7 +362,7 @@ found: fq->q.last_in |= INET_FRAG_FIRST_IN; } write_lock(&nf_frags.lock); - list_move_tail(&fq->q.lru_list, &nf_init_frags.lru_list); + list_move_tail(&fq->q.lru_list, &fq->q.net->lru_list); write_unlock(&nf_frags.lock); return 0; @@ -391,7 +431,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) clone->ip_summed = head->ip_summed; NFCT_FRAG6_CB(clone)->orig = NULL; - atomic_add(clone->truesize, &nf_init_frags.mem); + atomic_add(clone->truesize, &fq->q.net->mem); } /* We have to remove fragment header from datagram and to relocate @@ -415,7 +455,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) head->csum = csum_add(head->csum, fp->csum); head->truesize += fp->truesize; } - atomic_sub(head->truesize, &nf_init_frags.mem); + atomic_sub(head->truesize, &fq->q.net->mem); head->local_df = 1; head->next = NULL; @@ -527,6 +567,8 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user) { struct sk_buff *clone; struct net_device *dev = skb->dev; + struct net *net = skb_dst(skb) ? dev_net(skb_dst(skb)->dev) + : dev_net(skb->dev); struct frag_hdr *fhdr; struct nf_ct_frag6_queue *fq; struct ipv6hdr *hdr; @@ -560,10 +602,13 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user) hdr = ipv6_hdr(clone); fhdr = (struct frag_hdr *)skb_transport_header(clone); - if (atomic_read(&nf_init_frags.mem) > nf_init_frags.high_thresh) - nf_ct_frag6_evictor(); + if (atomic_read(&net->nf_frag.frags.mem) > net->nf_frag.frags.high_thresh) { + local_bh_disable(); + inet_frag_evictor(&net->nf_frag.frags, &nf_frags); + local_bh_enable(); + } - fq = fq_find(fhdr->identification, user, &hdr->saddr, &hdr->daddr); + fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr); if (fq == NULL) { pr_debug("Can't find and can't create new queue\n"); goto ret_orig; @@ -621,8 +666,31 @@ void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb, nf_conntrack_put_reasm(skb); } +static int nf_ct_net_init(struct net *net) +{ + net->nf_frag.frags.high_thresh = IPV6_FRAG_HIGH_THRESH; + net->nf_frag.frags.low_thresh = IPV6_FRAG_LOW_THRESH; + net->nf_frag.frags.timeout = IPV6_FRAG_TIMEOUT; + inet_frags_init_net(&net->nf_frag.frags); + + return nf_ct_frag6_sysctl_register(net); +} + +static void nf_ct_net_exit(struct net *net) +{ + nf_ct_frags6_sysctl_unregister(net); + inet_frags_exit_net(&net->nf_frag.frags, &nf_frags); +} + +static struct pernet_operations nf_ct_net_ops = { + .init = nf_ct_net_init, + .exit = nf_ct_net_exit, +}; + int nf_ct_frag6_init(void) { + int ret = 0; + nf_frags.hashfn = nf_hashfn; nf_frags.constructor = ip6_frag_init; nf_frags.destructor = NULL; @@ -631,32 +699,17 @@ int nf_ct_frag6_init(void) nf_frags.match = ip6_frag_match; nf_frags.frag_expire = nf_ct_frag6_expire; nf_frags.secret_interval = 10 * 60 * HZ; - nf_init_frags.timeout = IPV6_FRAG_TIMEOUT; - nf_init_frags.high_thresh = IPV6_FRAG_HIGH_THRESH; - nf_init_frags.low_thresh = IPV6_FRAG_LOW_THRESH; - inet_frags_init_net(&nf_init_frags); inet_frags_init(&nf_frags); -#ifdef CONFIG_SYSCTL - nf_ct_frag6_sysctl_header = register_net_sysctl(&init_net, "net/netfilter", - nf_ct_frag6_sysctl_table); - if (!nf_ct_frag6_sysctl_header) { + ret = register_pernet_subsys(&nf_ct_net_ops); + if (ret) inet_frags_fini(&nf_frags); - return -ENOMEM; - } -#endif - return 0; + return ret; } void nf_ct_frag6_cleanup(void) { -#ifdef CONFIG_SYSCTL - unregister_net_sysctl_table(nf_ct_frag6_sysctl_header); - nf_ct_frag6_sysctl_header = NULL; -#endif + unregister_pernet_subsys(&nf_ct_net_ops); inet_frags_fini(&nf_frags); - - nf_init_frags.low_thresh = 0; - nf_ct_frag6_evictor(); } -- cgit v1.2.3