From 496053f488fc2d859e41574f3421993826d2d0eb Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 11 Jan 2012 16:46:32 -0800 Subject: ipv4: Remove bogus checks of rt_gateway being zero. It can never actually happen. rt_gateway is either the fully resolved flow lookup key's destination address, or the non-zero FIB entry gateway address. Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 8 +++----- net/ipv4/ipip.c | 2 -- 2 files changed, 3 insertions(+), 7 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 2b53a1f7abf6..fc213350a6ed 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -724,11 +724,8 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev goto tx_error; } - if (skb->protocol == htons(ETH_P_IP)) { + if (skb->protocol == htons(ETH_P_IP)) rt = skb_rtable(skb); - if ((dst = rt->rt_gateway) == 0) - goto tx_error_icmp; - } #if IS_ENABLED(CONFIG_IPV6) else if (skb->protocol == htons(ETH_P_IPV6)) { struct neighbour *neigh = dst_get_neighbour_noref(skb_dst(skb)); @@ -910,9 +907,10 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev __IPTUNNEL_XMIT(tstats, &dev->stats); return NETDEV_TX_OK; +#if IS_ENABLED(CONFIG_IPV6) tx_error_icmp: dst_link_failure(skb); - +#endif tx_error: dev->stats.tx_errors++; dev_kfree_skb(skb); diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 22a199315309..87c63b67f8ce 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -454,8 +454,6 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) dev->stats.tx_fifo_errors++; goto tx_error; } - if ((dst = rt->rt_gateway) == 0) - goto tx_error_icmp; } rt = ip_route_output_ports(dev_net(dev), &fl4, NULL, -- cgit v1.2.3 From 658c8d964eb3cdb7e4230a59ba09c75a3359ee4a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 24 Jan 2012 18:18:05 -0500 Subject: ipip: Fix bug added to ipip_tunnel_xmit(). We can remove the rt_gateway == 0 check but we shouldn't remove the 'dst' initialization too. Noticed by Eric Dumazet. Signed-off-by: David S. Miller --- net/ipv4/ipip.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/ipv4') diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 87c63b67f8ce..f84ebff5cdb0 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -454,6 +454,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) dev->stats.tx_fifo_errors++; goto tx_error; } + dst = rt->rt_gateway; } rt = ip_route_output_ports(dev_net(dev), &fl4, NULL, -- cgit v1.2.3 From 61d57f87f3fb04a305f22befabd042ffbec8b852 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 24 Jan 2012 18:23:30 -0500 Subject: ip_gre: Fix bug added to ipgre_tunnel_xmit(). We can remove the rt_gateway == 0 check but we shouldn't remove the 'dst' initialization too. Noticed by Eric Dumazet. Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net/ipv4') diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index fc213350a6ed..05f7419ed7c5 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -724,8 +724,10 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev goto tx_error; } - if (skb->protocol == htons(ETH_P_IP)) + if (skb->protocol == htons(ETH_P_IP)) { rt = skb_rtable(skb); + dst = rt->rt_gateway; + } #if IS_ENABLED(CONFIG_IPV6) else if (skb->protocol == htons(ETH_P_IPV6)) { struct neighbour *neigh = dst_get_neighbour_noref(skb_dst(skb)); -- cgit v1.2.3 From 09e9b813d34d9a09d64a64580a9959d8bae1f4f5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 25 Jan 2012 04:44:20 +0000 Subject: tcp: add LINUX_MIB_TCPRETRANSFAIL counter It might be useful to get a counter of failed tcp_retransmit_skb() calls. Reported-by: Satoru Moriya Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/snmp.h | 1 + net/ipv4/proc.c | 1 + net/ipv4/tcp_output.c | 4 +++- 3 files changed, 5 insertions(+), 1 deletion(-) (limited to 'net/ipv4') diff --git a/include/linux/snmp.h b/include/linux/snmp.h index c1241c428179..8ee8af4e6da9 100644 --- a/include/linux/snmp.h +++ b/include/linux/snmp.h @@ -232,6 +232,7 @@ enum LINUX_MIB_TCPTIMEWAITOVERFLOW, /* TCPTimeWaitOverflow */ LINUX_MIB_TCPREQQFULLDOCOOKIES, /* TCPReqQFullDoCookies */ LINUX_MIB_TCPREQQFULLDROP, /* TCPReqQFullDrop */ + LINUX_MIB_TCPRETRANSFAIL, /* TCPRetransFail */ __LINUX_MIB_MAX }; diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 6afc807ee2ad..02d61079f08b 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -256,6 +256,7 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPTimeWaitOverflow", LINUX_MIB_TCPTIMEWAITOVERFLOW), SNMP_MIB_ITEM("TCPReqQFullDoCookies", LINUX_MIB_TCPREQQFULLDOCOOKIES), SNMP_MIB_ITEM("TCPReqQFullDrop", LINUX_MIB_TCPREQQFULLDROP), + SNMP_MIB_ITEM("TCPRetransFail", LINUX_MIB_TCPRETRANSFAIL), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 8c8de2780c7a..561550ab3c30 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2308,8 +2308,10 @@ begin_fwd: if (sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS)) continue; - if (tcp_retransmit_skb(sk, skb)) + if (tcp_retransmit_skb(sk, skb)) { + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL); return; + } NET_INC_STATS_BH(sock_net(sk), mib_idx); if (inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery) -- cgit v1.2.3 From 39232973b779ab0c02cb6dcd8f819b7cb0fcd09a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 26 Jan 2012 15:22:32 -0500 Subject: ipv4/ipv6: Prepare for new route gateway semantics. In the future the ipv4/ipv6 route gateway will take on two types of values: 1) INADDR_ANY/IN6ADDR_ANY, for local network routes, and in this case the neighbour must be obtained using the destination address in ipv4/ipv6 header as the lookup key. 2) Everything else, the actual nexthop route address. So if the gateway is not inaddr-any we use it, otherwise we must use the packet's destination address. Signed-off-by: David S. Miller --- net/ipv4/route.c | 5 +++++ net/ipv6/route.c | 16 +++++++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) (limited to 'net/ipv4') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index bcacf54e5418..4eeb8ce856e2 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1117,10 +1117,15 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, const vo static const __be32 inaddr_any = 0; struct net_device *dev = dst->dev; const __be32 *pkey = daddr; + const struct rtable *rt; struct neighbour *n; + rt = (const struct rtable *) dst; + if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) pkey = &inaddr_any; + else if (rt->rt_gateway) + pkey = (const __be32 *) &rt->rt_gateway; n = __ipv4_neigh_lookup(&arp_tbl, dev, *(__force u32 *)pkey); if (n) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 8c2e3ab58f2a..7d7f30697ead 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -121,9 +121,23 @@ static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) return p; } +static inline const void *choose_neigh_daddr(struct rt6_info *rt, const void *daddr) +{ + struct in6_addr *p = &rt->rt6i_gateway; + + if (p->s6_addr32[0] | p->s6_addr32[1] | + p->s6_addr32[2] | p->s6_addr32[3]) + return (const void *) p; + return daddr; +} + static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr) { - struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr); + struct rt6_info *rt = (struct rt6_info *) dst; + struct neighbour *n; + + daddr = choose_neigh_daddr(rt, daddr); + n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr); if (n) return n; return neigh_create(&nd_tbl, daddr, dst->dev); -- cgit v1.2.3 From 0ec88662041e172acf33d7a15a2020841ee82afb Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 27 Jan 2012 15:01:08 -0800 Subject: ipv4: ip_gre: Convert to dst_neigh_lookup() The conversion is very similar to that made to ipv6's SIT code. Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 05f7419ed7c5..bf9541648b96 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -730,10 +730,12 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev } #if IS_ENABLED(CONFIG_IPV6) else if (skb->protocol == htons(ETH_P_IPV6)) { - struct neighbour *neigh = dst_get_neighbour_noref(skb_dst(skb)); const struct in6_addr *addr6; + struct neighbour *neigh; + bool do_tx_error_icmp; int addr_type; + neigh = dst_neigh_lookup(skb_dst(skb), &ipv6_hdr(skb)->daddr); if (neigh == NULL) goto tx_error; @@ -746,9 +748,14 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev } if ((addr_type & IPV6_ADDR_COMPATv4) == 0) + do_tx_error_icmp = true; + else { + do_tx_error_icmp = false; + dst = addr6->s6_addr32[3]; + } + neigh_release(neigh); + if (do_tx_error_icmp) goto tx_error_icmp; - - dst = addr6->s6_addr32[3]; } #endif else -- cgit v1.2.3 From a2d91241a80ec9bbc5ab24b9a2c4d730b3fa5730 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 31 Jan 2012 01:04:42 +0000 Subject: tcp: md5: remove obsolete md5_add() method We no longer use md5_add() method from struct tcp_sock_af_ops Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 4 ---- net/ipv4/tcp_ipv4.c | 8 -------- net/ipv6/tcp_ipv6.c | 9 --------- 3 files changed, 21 deletions(-) (limited to 'net/ipv4') diff --git a/include/net/tcp.h b/include/net/tcp.h index 0118ea999f67..3c903462adf5 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1462,10 +1462,6 @@ struct tcp_sock_af_ops { const struct sock *sk, const struct request_sock *req, const struct sk_buff *skb); - int (*md5_add) (struct sock *sk, - struct sock *addr_sk, - u8 *newkey, - u8 len); int (*md5_parse) (struct sock *sk, char __user *optval, int optlen); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 337ba4cca052..345e24928fa6 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -967,13 +967,6 @@ int tcp_v4_md5_do_add(struct sock *sk, __be32 addr, } EXPORT_SYMBOL(tcp_v4_md5_do_add); -static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk, - u8 *newkey, u8 newkeylen) -{ - return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->inet_daddr, - newkey, newkeylen); -} - int tcp_v4_md5_do_del(struct sock *sk, __be32 addr) { struct tcp_sock *tp = tcp_sk(sk); @@ -1853,7 +1846,6 @@ EXPORT_SYMBOL(ipv4_specific); static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = { .md5_lookup = tcp_v4_md5_lookup, .calc_md5_hash = tcp_v4_md5_hash_skb, - .md5_add = tcp_v4_md5_add_func, .md5_parse = tcp_v4_parse_md5_keys, }; #endif diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 3edd05ae4388..f37769ea6375 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -626,13 +626,6 @@ static int tcp_v6_md5_do_add(struct sock *sk, const struct in6_addr *peer, return 0; } -static int tcp_v6_md5_add_func(struct sock *sk, struct sock *addr_sk, - u8 *newkey, __u8 newkeylen) -{ - return tcp_v6_md5_do_add(sk, &inet6_sk(addr_sk)->daddr, - newkey, newkeylen); -} - static int tcp_v6_md5_do_del(struct sock *sk, const struct in6_addr *peer) { struct tcp_sock *tp = tcp_sk(sk); @@ -1898,7 +1891,6 @@ static const struct inet_connection_sock_af_ops ipv6_specific = { static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = { .md5_lookup = tcp_v6_md5_lookup, .calc_md5_hash = tcp_v6_md5_hash_skb, - .md5_add = tcp_v6_md5_add_func, .md5_parse = tcp_v6_parse_md5_keys, }; #endif @@ -1930,7 +1922,6 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = { static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = { .md5_lookup = tcp_v4_md5_lookup, .calc_md5_hash = tcp_v4_md5_hash_skb, - .md5_add = tcp_v6_md5_add_func, .md5_parse = tcp_v6_parse_md5_keys, }; #endif -- cgit v1.2.3 From a915da9b69273815527ccb3789421cb7027b545b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 31 Jan 2012 05:18:33 +0000 Subject: tcp: md5: rcu conversion In order to be able to support proper RST messages for TCP MD5 flows, we need to allow access to MD5 keys without locking listener socket. This conversion is a nice cleanup, and shrinks size of timewait sockets by 80 bytes. IPv6 code reuses generic code found in IPv4 instead of duplicating it. Control path uses GFP_KERNEL allocations instead of GFP_ATOMIC. Signed-off-by: Eric Dumazet Cc: Shawn Lu Signed-off-by: David S. Miller --- include/linux/tcp.h | 3 +- include/net/tcp.h | 61 ++++++------- net/ipv4/tcp_ipv4.c | 227 +++++++++++++++++++---------------------------- net/ipv4/tcp_minisocks.c | 12 +-- net/ipv6/tcp_ipv6.c | 173 +++--------------------------------- 5 files changed, 141 insertions(+), 335 deletions(-) (limited to 'net/ipv4') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 46a85c9e1f25..c2025f159641 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -486,8 +486,7 @@ struct tcp_timewait_sock { u32 tw_ts_recent; long tw_ts_recent_stamp; #ifdef CONFIG_TCP_MD5SIG - u16 tw_md5_keylen; - u8 tw_md5_key[TCP_MD5SIG_MAXKEYLEN]; + struct tcp_md5sig_key *tw_md5_key; #endif /* Few sockets in timewait have cookies; in that case, then this * object holds a reference to them (tw_cookie_values->kref). diff --git a/include/net/tcp.h b/include/net/tcp.h index 3c903462adf5..10ae4c7b6b4f 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1130,35 +1130,26 @@ static inline void tcp_clear_all_retrans_hints(struct tcp_sock *tp) /* MD5 Signature */ struct crypto_hash; +union tcp_md5_addr { + struct in_addr a4; +#if IS_ENABLED(CONFIG_IPV6) + struct in6_addr a6; +#endif +}; + /* - key database */ struct tcp_md5sig_key { - u8 *key; + struct hlist_node node; u8 keylen; -}; - -struct tcp4_md5sig_key { - struct tcp_md5sig_key base; - __be32 addr; -}; - -struct tcp6_md5sig_key { - struct tcp_md5sig_key base; -#if 0 - u32 scope_id; /* XXX */ -#endif - struct in6_addr addr; + u8 family; /* AF_INET or AF_INET6 */ + union tcp_md5_addr addr; + u8 key[TCP_MD5SIG_MAXKEYLEN]; + struct rcu_head rcu; }; /* - sock block */ struct tcp_md5sig_info { - struct tcp4_md5sig_key *keys4; -#if IS_ENABLED(CONFIG_IPV6) - struct tcp6_md5sig_key *keys6; - u32 entries6; - u32 alloced6; -#endif - u32 entries4; - u32 alloced4; + struct hlist_head head; }; /* - pseudo header */ @@ -1195,19 +1186,25 @@ extern int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key, const struct sock *sk, const struct request_sock *req, const struct sk_buff *skb); -extern struct tcp_md5sig_key * tcp_v4_md5_lookup(struct sock *sk, - struct sock *addr_sk); -extern int tcp_v4_md5_do_add(struct sock *sk, __be32 addr, u8 *newkey, - u8 newkeylen); -extern int tcp_v4_md5_do_del(struct sock *sk, __be32 addr); +extern int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, + int family, const u8 *newkey, + u8 newkeylen, gfp_t gfp); +extern int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, + int family); +extern struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk, + struct sock *addr_sk); #ifdef CONFIG_TCP_MD5SIG -#define tcp_twsk_md5_key(twsk) ((twsk)->tw_md5_keylen ? \ - &(struct tcp_md5sig_key) { \ - .key = (twsk)->tw_md5_key, \ - .keylen = (twsk)->tw_md5_keylen, \ - } : NULL) +extern struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk, + const union tcp_md5_addr *addr, int family); +#define tcp_twsk_md5_key(twsk) ((twsk)->tw_md5_key) #else +static inline struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk, + const union tcp_md5_addr *addr, + int family) +{ + return NULL; +} #define tcp_twsk_md5_key(twsk) NULL #endif diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 345e24928fa6..1d5fd82c5c08 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -90,16 +90,8 @@ EXPORT_SYMBOL(sysctl_tcp_low_latency); #ifdef CONFIG_TCP_MD5SIG -static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, - __be32 addr); -static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key, +static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, __be32 daddr, __be32 saddr, const struct tcphdr *th); -#else -static inline -struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr) -{ - return NULL; -} #endif struct inet_hashinfo tcp_hashinfo; @@ -631,7 +623,9 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) arg.iov[0].iov_len = sizeof(rep.th); #ifdef CONFIG_TCP_MD5SIG - key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->saddr) : NULL; + key = sk ? tcp_md5_do_lookup(sk, + (union tcp_md5_addr *)&ip_hdr(skb)->saddr, + AF_INET) : NULL; if (key) { rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | @@ -759,7 +753,8 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent, 0, - tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr), + tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr, + AF_INET), inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0, ip_hdr(skb)->tos); } @@ -876,146 +871,124 @@ static struct ip_options_rcu *tcp_v4_save_options(struct sock *sk, */ /* Find the Key structure for an address. */ -static struct tcp_md5sig_key * - tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr) +struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk, + const union tcp_md5_addr *addr, + int family) { struct tcp_sock *tp = tcp_sk(sk); - int i; + struct tcp_md5sig_key *key; + struct hlist_node *pos; + unsigned int size = sizeof(struct in_addr); - if (!tp->md5sig_info || !tp->md5sig_info->entries4) + if (!tp->md5sig_info) return NULL; - for (i = 0; i < tp->md5sig_info->entries4; i++) { - if (tp->md5sig_info->keys4[i].addr == addr) - return &tp->md5sig_info->keys4[i].base; +#if IS_ENABLED(CONFIG_IPV6) + if (family == AF_INET6) + size = sizeof(struct in6_addr); +#endif + hlist_for_each_entry_rcu(key, pos, &tp->md5sig_info->head, node) { + if (key->family != family) + continue; + if (!memcmp(&key->addr, addr, size)) + return key; } return NULL; } +EXPORT_SYMBOL(tcp_md5_do_lookup); struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk, struct sock *addr_sk) { - return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->inet_daddr); + union tcp_md5_addr *addr; + + addr = (union tcp_md5_addr *)&inet_sk(addr_sk)->inet_daddr; + return tcp_md5_do_lookup(sk, addr, AF_INET); } EXPORT_SYMBOL(tcp_v4_md5_lookup); static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk, struct request_sock *req) { - return tcp_v4_md5_do_lookup(sk, inet_rsk(req)->rmt_addr); + union tcp_md5_addr *addr; + + addr = (union tcp_md5_addr *)&inet_rsk(req)->rmt_addr; + return tcp_md5_do_lookup(sk, addr, AF_INET); } /* This can be called on a newly created socket, from other files */ -int tcp_v4_md5_do_add(struct sock *sk, __be32 addr, - u8 *newkey, u8 newkeylen) +int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, + int family, const u8 *newkey, u8 newkeylen, gfp_t gfp) { /* Add Key to the list */ struct tcp_md5sig_key *key; struct tcp_sock *tp = tcp_sk(sk); - struct tcp4_md5sig_key *keys; + struct tcp_md5sig_info *md5sig; - key = tcp_v4_md5_do_lookup(sk, addr); + key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET); if (key) { /* Pre-existing entry - just update that one. */ - kfree(key->key); - key->key = newkey; + memcpy(key->key, newkey, newkeylen); key->keylen = newkeylen; - } else { - struct tcp_md5sig_info *md5sig; - - if (!tp->md5sig_info) { - tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info), - GFP_ATOMIC); - if (!tp->md5sig_info) { - kfree(newkey); - return -ENOMEM; - } - sk_nocaps_add(sk, NETIF_F_GSO_MASK); - } + return 0; + } - md5sig = tp->md5sig_info; - if (md5sig->entries4 == 0 && - tcp_alloc_md5sig_pool(sk) == NULL) { - kfree(newkey); + md5sig = tp->md5sig_info; + if (!md5sig) { + md5sig = kmalloc(sizeof(*md5sig), gfp); + if (!md5sig) return -ENOMEM; - } - - if (md5sig->alloced4 == md5sig->entries4) { - keys = kmalloc((sizeof(*keys) * - (md5sig->entries4 + 1)), GFP_ATOMIC); - if (!keys) { - kfree(newkey); - if (md5sig->entries4 == 0) - tcp_free_md5sig_pool(); - return -ENOMEM; - } - if (md5sig->entries4) - memcpy(keys, md5sig->keys4, - sizeof(*keys) * md5sig->entries4); + sk_nocaps_add(sk, NETIF_F_GSO_MASK); + INIT_HLIST_HEAD(&md5sig->head); + tp->md5sig_info = md5sig; + } - /* Free old key list, and reference new one */ - kfree(md5sig->keys4); - md5sig->keys4 = keys; - md5sig->alloced4++; - } - md5sig->entries4++; - md5sig->keys4[md5sig->entries4 - 1].addr = addr; - md5sig->keys4[md5sig->entries4 - 1].base.key = newkey; - md5sig->keys4[md5sig->entries4 - 1].base.keylen = newkeylen; + key = kmalloc(sizeof(*key), gfp); + if (!key) + return -ENOMEM; + if (hlist_empty(&md5sig->head) && !tcp_alloc_md5sig_pool(sk)) { + kfree(key); + return -ENOMEM; } + + memcpy(key->key, newkey, newkeylen); + key->keylen = newkeylen; + key->family = family; + memcpy(&key->addr, addr, + (family == AF_INET6) ? sizeof(struct in6_addr) : + sizeof(struct in_addr)); + hlist_add_head_rcu(&key->node, &md5sig->head); return 0; } -EXPORT_SYMBOL(tcp_v4_md5_do_add); +EXPORT_SYMBOL(tcp_md5_do_add); -int tcp_v4_md5_do_del(struct sock *sk, __be32 addr) +int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family) { struct tcp_sock *tp = tcp_sk(sk); - int i; - - for (i = 0; i < tp->md5sig_info->entries4; i++) { - if (tp->md5sig_info->keys4[i].addr == addr) { - /* Free the key */ - kfree(tp->md5sig_info->keys4[i].base.key); - tp->md5sig_info->entries4--; - - if (tp->md5sig_info->entries4 == 0) { - kfree(tp->md5sig_info->keys4); - tp->md5sig_info->keys4 = NULL; - tp->md5sig_info->alloced4 = 0; - tcp_free_md5sig_pool(); - } else if (tp->md5sig_info->entries4 != i) { - /* Need to do some manipulation */ - memmove(&tp->md5sig_info->keys4[i], - &tp->md5sig_info->keys4[i+1], - (tp->md5sig_info->entries4 - i) * - sizeof(struct tcp4_md5sig_key)); - } - return 0; - } - } - return -ENOENT; + struct tcp_md5sig_key *key; + + key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET); + if (!key) + return -ENOENT; + hlist_del_rcu(&key->node); + kfree_rcu(key, rcu); + if (hlist_empty(&tp->md5sig_info->head)) + tcp_free_md5sig_pool(); + return 0; } -EXPORT_SYMBOL(tcp_v4_md5_do_del); +EXPORT_SYMBOL(tcp_md5_do_del); -static void tcp_v4_clear_md5_list(struct sock *sk) +void tcp_clear_md5_list(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); + struct tcp_md5sig_key *key; + struct hlist_node *pos, *n; - /* Free each key, then the set of key keys, - * the crypto element, and then decrement our - * hold on the last resort crypto. - */ - if (tp->md5sig_info->entries4) { - int i; - for (i = 0; i < tp->md5sig_info->entries4; i++) - kfree(tp->md5sig_info->keys4[i].base.key); - tp->md5sig_info->entries4 = 0; + if (!hlist_empty(&tp->md5sig_info->head)) tcp_free_md5sig_pool(); - } - if (tp->md5sig_info->keys4) { - kfree(tp->md5sig_info->keys4); - tp->md5sig_info->keys4 = NULL; - tp->md5sig_info->alloced4 = 0; + hlist_for_each_entry_safe(key, pos, n, &tp->md5sig_info->head, node) { + hlist_del_rcu(&key->node); + kfree_rcu(key, rcu); } } @@ -1024,7 +997,6 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval, { struct tcp_md5sig cmd; struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr; - u8 *newkey; if (optlen < sizeof(cmd)) return -EINVAL; @@ -1038,29 +1010,16 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval, if (!cmd.tcpm_key || !cmd.tcpm_keylen) { if (!tcp_sk(sk)->md5sig_info) return -ENOENT; - return tcp_v4_md5_do_del(sk, sin->sin_addr.s_addr); + return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr, + AF_INET); } if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) return -EINVAL; - if (!tcp_sk(sk)->md5sig_info) { - struct tcp_sock *tp = tcp_sk(sk); - struct tcp_md5sig_info *p; - - p = kzalloc(sizeof(*p), sk->sk_allocation); - if (!p) - return -EINVAL; - - tp->md5sig_info = p; - sk_nocaps_add(sk, NETIF_F_GSO_MASK); - } - - newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, sk->sk_allocation); - if (!newkey) - return -ENOMEM; - return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr, - newkey, cmd.tcpm_keylen); + return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr, + AF_INET, cmd.tcpm_key, cmd.tcpm_keylen, + GFP_KERNEL); } static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp, @@ -1086,7 +1045,7 @@ static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp, return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp)); } -static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key, +static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, __be32 daddr, __be32 saddr, const struct tcphdr *th) { struct tcp_md5sig_pool *hp; @@ -1186,7 +1145,8 @@ static int tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) int genhash; unsigned char newhash[16]; - hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr); + hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr, + AF_INET); hash_location = tcp_parse_md5sig_option(th); /* We've parsed the options - do we have a hash? */ @@ -1474,7 +1434,8 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, #ifdef CONFIG_TCP_MD5SIG /* Copy over the MD5 key from the original socket */ - key = tcp_v4_md5_do_lookup(sk, newinet->inet_daddr); + key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr, + AF_INET); if (key != NULL) { /* * We're using one, so create a matching key @@ -1482,10 +1443,8 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, * memory, then we end up not copying the key * across. Shucks. */ - char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC); - if (newkey != NULL) - tcp_v4_md5_do_add(newsk, newinet->inet_daddr, - newkey, key->keylen); + tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr, + AF_INET, key->key, key->keylen, GFP_ATOMIC); sk_nocaps_add(newsk, NETIF_F_GSO_MASK); } #endif @@ -1934,7 +1893,7 @@ void tcp_v4_destroy_sock(struct sock *sk) #ifdef CONFIG_TCP_MD5SIG /* Clean up the MD5 key list, if any */ if (tp->md5sig_info) { - tcp_v4_clear_md5_list(sk); + tcp_clear_md5_list(sk); kfree(tp->md5sig_info); tp->md5sig_info = NULL; } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 550e755747e0..3cabafb5cdd1 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -359,13 +359,11 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) */ do { struct tcp_md5sig_key *key; - memset(tcptw->tw_md5_key, 0, sizeof(tcptw->tw_md5_key)); - tcptw->tw_md5_keylen = 0; + tcptw->tw_md5_key = NULL; key = tp->af_specific->md5_lookup(sk, sk); if (key != NULL) { - memcpy(&tcptw->tw_md5_key, key->key, key->keylen); - tcptw->tw_md5_keylen = key->keylen; - if (tcp_alloc_md5sig_pool(sk) == NULL) + tcptw->tw_md5_key = kmemdup(key, sizeof(*key), GFP_ATOMIC); + if (tcptw->tw_md5_key && tcp_alloc_md5sig_pool(sk) == NULL) BUG(); } } while (0); @@ -405,8 +403,10 @@ void tcp_twsk_destructor(struct sock *sk) { #ifdef CONFIG_TCP_MD5SIG struct tcp_timewait_sock *twsk = tcp_twsk(sk); - if (twsk->tw_md5_keylen) + if (twsk->tw_md5_key) { tcp_free_md5sig_pool(); + kfree_rcu(twsk->tw_md5_key, rcu); + } #endif } EXPORT_SYMBOL_GPL(tcp_twsk_destructor); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f37769ea6375..bec41f9a6413 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -540,19 +540,7 @@ static void tcp_v6_reqsk_destructor(struct request_sock *req) static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk, const struct in6_addr *addr) { - struct tcp_sock *tp = tcp_sk(sk); - int i; - - BUG_ON(tp == NULL); - - if (!tp->md5sig_info || !tp->md5sig_info->entries6) - return NULL; - - for (i = 0; i < tp->md5sig_info->entries6; i++) { - if (ipv6_addr_equal(&tp->md5sig_info->keys6[i].addr, addr)) - return &tp->md5sig_info->keys6[i].base; - } - return NULL; + return tcp_md5_do_lookup(sk, (union tcp_md5_addr *)addr, AF_INET6); } static struct tcp_md5sig_key *tcp_v6_md5_lookup(struct sock *sk, @@ -567,129 +555,11 @@ static struct tcp_md5sig_key *tcp_v6_reqsk_md5_lookup(struct sock *sk, return tcp_v6_md5_do_lookup(sk, &inet6_rsk(req)->rmt_addr); } -static int tcp_v6_md5_do_add(struct sock *sk, const struct in6_addr *peer, - char *newkey, u8 newkeylen) -{ - /* Add key to the list */ - struct tcp_md5sig_key *key; - struct tcp_sock *tp = tcp_sk(sk); - struct tcp6_md5sig_key *keys; - - key = tcp_v6_md5_do_lookup(sk, peer); - if (key) { - /* modify existing entry - just update that one */ - kfree(key->key); - key->key = newkey; - key->keylen = newkeylen; - } else { - /* reallocate new list if current one is full. */ - if (!tp->md5sig_info) { - tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info), GFP_ATOMIC); - if (!tp->md5sig_info) { - kfree(newkey); - return -ENOMEM; - } - sk_nocaps_add(sk, NETIF_F_GSO_MASK); - } - if (tp->md5sig_info->entries6 == 0 && - tcp_alloc_md5sig_pool(sk) == NULL) { - kfree(newkey); - return -ENOMEM; - } - if (tp->md5sig_info->alloced6 == tp->md5sig_info->entries6) { - keys = kmalloc((sizeof (tp->md5sig_info->keys6[0]) * - (tp->md5sig_info->entries6 + 1)), GFP_ATOMIC); - - if (!keys) { - kfree(newkey); - if (tp->md5sig_info->entries6 == 0) - tcp_free_md5sig_pool(); - return -ENOMEM; - } - - if (tp->md5sig_info->entries6) - memmove(keys, tp->md5sig_info->keys6, - (sizeof (tp->md5sig_info->keys6[0]) * - tp->md5sig_info->entries6)); - - kfree(tp->md5sig_info->keys6); - tp->md5sig_info->keys6 = keys; - tp->md5sig_info->alloced6++; - } - - tp->md5sig_info->keys6[tp->md5sig_info->entries6].addr = *peer; - tp->md5sig_info->keys6[tp->md5sig_info->entries6].base.key = newkey; - tp->md5sig_info->keys6[tp->md5sig_info->entries6].base.keylen = newkeylen; - - tp->md5sig_info->entries6++; - } - return 0; -} - -static int tcp_v6_md5_do_del(struct sock *sk, const struct in6_addr *peer) -{ - struct tcp_sock *tp = tcp_sk(sk); - int i; - - for (i = 0; i < tp->md5sig_info->entries6; i++) { - if (ipv6_addr_equal(&tp->md5sig_info->keys6[i].addr, peer)) { - /* Free the key */ - kfree(tp->md5sig_info->keys6[i].base.key); - tp->md5sig_info->entries6--; - - if (tp->md5sig_info->entries6 == 0) { - kfree(tp->md5sig_info->keys6); - tp->md5sig_info->keys6 = NULL; - tp->md5sig_info->alloced6 = 0; - tcp_free_md5sig_pool(); - } else { - /* shrink the database */ - if (tp->md5sig_info->entries6 != i) - memmove(&tp->md5sig_info->keys6[i], - &tp->md5sig_info->keys6[i+1], - (tp->md5sig_info->entries6 - i) - * sizeof (tp->md5sig_info->keys6[0])); - } - return 0; - } - } - return -ENOENT; -} - -static void tcp_v6_clear_md5_list (struct sock *sk) -{ - struct tcp_sock *tp = tcp_sk(sk); - int i; - - if (tp->md5sig_info->entries6) { - for (i = 0; i < tp->md5sig_info->entries6; i++) - kfree(tp->md5sig_info->keys6[i].base.key); - tp->md5sig_info->entries6 = 0; - tcp_free_md5sig_pool(); - } - - kfree(tp->md5sig_info->keys6); - tp->md5sig_info->keys6 = NULL; - tp->md5sig_info->alloced6 = 0; - - if (tp->md5sig_info->entries4) { - for (i = 0; i < tp->md5sig_info->entries4; i++) - kfree(tp->md5sig_info->keys4[i].base.key); - tp->md5sig_info->entries4 = 0; - tcp_free_md5sig_pool(); - } - - kfree(tp->md5sig_info->keys4); - tp->md5sig_info->keys4 = NULL; - tp->md5sig_info->alloced4 = 0; -} - static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval, int optlen) { struct tcp_md5sig cmd; struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr; - u8 *newkey; if (optlen < sizeof(cmd)) return -EINVAL; @@ -704,33 +574,21 @@ static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval, if (!tcp_sk(sk)->md5sig_info) return -ENOENT; if (ipv6_addr_v4mapped(&sin6->sin6_addr)) - return tcp_v4_md5_do_del(sk, sin6->sin6_addr.s6_addr32[3]); - return tcp_v6_md5_do_del(sk, &sin6->sin6_addr); + return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], + AF_INET); + return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr, + AF_INET6); } if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) return -EINVAL; - if (!tcp_sk(sk)->md5sig_info) { - struct tcp_sock *tp = tcp_sk(sk); - struct tcp_md5sig_info *p; - - p = kzalloc(sizeof(struct tcp_md5sig_info), GFP_KERNEL); - if (!p) - return -ENOMEM; - - tp->md5sig_info = p; - sk_nocaps_add(sk, NETIF_F_GSO_MASK); - } + if (ipv6_addr_v4mapped(&sin6->sin6_addr)) + return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], + AF_INET, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL); - newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL); - if (!newkey) - return -ENOMEM; - if (ipv6_addr_v4mapped(&sin6->sin6_addr)) { - return tcp_v4_md5_do_add(sk, sin6->sin6_addr.s6_addr32[3], - newkey, cmd.tcpm_keylen); - } - return tcp_v6_md5_do_add(sk, &sin6->sin6_addr, newkey, cmd.tcpm_keylen); + return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr, + AF_INET6, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL); } static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp, @@ -1503,10 +1361,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, * memory, then we end up not copying the key * across. Shucks. */ - char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC); - if (newkey != NULL) - tcp_v6_md5_do_add(newsk, &newnp->daddr, - newkey, key->keylen); + tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newnp->daddr, + AF_INET6, key->key, key->keylen, GFP_ATOMIC); } #endif @@ -1995,11 +1851,6 @@ static int tcp_v6_init_sock(struct sock *sk) static void tcp_v6_destroy_sock(struct sock *sk) { -#ifdef CONFIG_TCP_MD5SIG - /* Clean up the MD5 key list */ - if (tcp_sk(sk)->md5sig_info) - tcp_v6_clear_md5_list(sk); -#endif tcp_v4_destroy_sock(sk); inet6_destroy_sock(sk); } -- cgit v1.2.3 From 5f3d9cb2962967d9d7e03abb4a7ca275a9a3fea5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 31 Jan 2012 10:56:48 +0000 Subject: tcp: md5: use sock_kmalloc() to limit md5 keys There is no limit on number of MD5 keys an application can attach to a tcp socket. This patch adds a per tcp socket limit based on /proc/sys/net/core/optmem_max With current default optmem_max values, this allows about 150 keys on 64bit arches, and 88 keys on 32bit arches. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 1d5fd82c5c08..da5d3226771b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -943,11 +943,11 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, tp->md5sig_info = md5sig; } - key = kmalloc(sizeof(*key), gfp); + key = sock_kmalloc(sk, sizeof(*key), gfp); if (!key) return -ENOMEM; if (hlist_empty(&md5sig->head) && !tcp_alloc_md5sig_pool(sk)) { - kfree(key); + sock_kfree_s(sk, key, sizeof(*key)); return -ENOMEM; } @@ -971,6 +971,7 @@ int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family) if (!key) return -ENOENT; hlist_del_rcu(&key->node); + atomic_sub(sizeof(*key), &sk->sk_omem_alloc); kfree_rcu(key, rcu); if (hlist_empty(&tp->md5sig_info->head)) tcp_free_md5sig_pool(); @@ -988,6 +989,7 @@ void tcp_clear_md5_list(struct sock *sk) tcp_free_md5sig_pool(); hlist_for_each_entry_safe(key, pos, n, &tp->md5sig_info->head, node) { hlist_del_rcu(&key->node); + atomic_sub(sizeof(*key), &sk->sk_omem_alloc); kfree_rcu(key, rcu); } } -- cgit v1.2.3 From a8afca032998850ec63e83d555cdcf0eb5680cd6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 31 Jan 2012 18:45:40 +0000 Subject: tcp: md5: protects md5sig_info with RCU This patch makes sure we use appropriate memory barriers before publishing tp->md5sig_info, allowing tcp_md5_do_lookup() being used from tcp_v4_send_reset() without holding socket lock (upcoming patch from Shawn Lu) Note we also need to respect rcu grace period before its freeing, since we can free socket without this grace period thanks to SLAB_DESTROY_BY_RCU Signed-off-by: Eric Dumazet Cc: Shawn Lu Signed-off-by: David S. Miller --- include/linux/tcp.h | 2 +- include/net/tcp.h | 1 + net/ipv4/tcp_ipv4.c | 32 ++++++++++++++++++++------------ net/ipv6/tcp_ipv6.c | 2 -- 4 files changed, 22 insertions(+), 15 deletions(-) (limited to 'net/ipv4') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index c2025f159641..115389e9b945 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -463,7 +463,7 @@ struct tcp_sock { const struct tcp_sock_af_ops *af_specific; /* TCP MD5 Signature Option information */ - struct tcp_md5sig_info *md5sig_info; + struct tcp_md5sig_info __rcu *md5sig_info; #endif /* When the cookie options are generated and exchanged, then this diff --git a/include/net/tcp.h b/include/net/tcp.h index 10ae4c7b6b4f..78880ba0f560 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1150,6 +1150,7 @@ struct tcp_md5sig_key { /* - sock block */ struct tcp_md5sig_info { struct hlist_head head; + struct rcu_head rcu; }; /* - pseudo header */ diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index da5d3226771b..567cca9b30df 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -879,14 +879,18 @@ struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk, struct tcp_md5sig_key *key; struct hlist_node *pos; unsigned int size = sizeof(struct in_addr); + struct tcp_md5sig_info *md5sig; - if (!tp->md5sig_info) + /* caller either holds rcu_read_lock() or socket lock */ + md5sig = rcu_dereference_check(tp->md5sig_info, + sock_owned_by_user(sk)); + if (!md5sig) return NULL; #if IS_ENABLED(CONFIG_IPV6) if (family == AF_INET6) size = sizeof(struct in6_addr); #endif - hlist_for_each_entry_rcu(key, pos, &tp->md5sig_info->head, node) { + hlist_for_each_entry_rcu(key, pos, &md5sig->head, node) { if (key->family != family) continue; if (!memcmp(&key->addr, addr, size)) @@ -932,7 +936,8 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, return 0; } - md5sig = tp->md5sig_info; + md5sig = rcu_dereference_protected(tp->md5sig_info, + sock_owned_by_user(sk)); if (!md5sig) { md5sig = kmalloc(sizeof(*md5sig), gfp); if (!md5sig) @@ -940,7 +945,7 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, sk_nocaps_add(sk, NETIF_F_GSO_MASK); INIT_HLIST_HEAD(&md5sig->head); - tp->md5sig_info = md5sig; + rcu_assign_pointer(tp->md5sig_info, md5sig); } key = sock_kmalloc(sk, sizeof(*key), gfp); @@ -966,6 +971,7 @@ int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family) { struct tcp_sock *tp = tcp_sk(sk); struct tcp_md5sig_key *key; + struct tcp_md5sig_info *md5sig; key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET); if (!key) @@ -973,7 +979,9 @@ int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family) hlist_del_rcu(&key->node); atomic_sub(sizeof(*key), &sk->sk_omem_alloc); kfree_rcu(key, rcu); - if (hlist_empty(&tp->md5sig_info->head)) + md5sig = rcu_dereference_protected(tp->md5sig_info, + sock_owned_by_user(sk)); + if (hlist_empty(&md5sig->head)) tcp_free_md5sig_pool(); return 0; } @@ -984,10 +992,13 @@ void tcp_clear_md5_list(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); struct tcp_md5sig_key *key; struct hlist_node *pos, *n; + struct tcp_md5sig_info *md5sig; - if (!hlist_empty(&tp->md5sig_info->head)) + md5sig = rcu_dereference_protected(tp->md5sig_info, 1); + + if (!hlist_empty(&md5sig->head)) tcp_free_md5sig_pool(); - hlist_for_each_entry_safe(key, pos, n, &tp->md5sig_info->head, node) { + hlist_for_each_entry_safe(key, pos, n, &md5sig->head, node) { hlist_del_rcu(&key->node); atomic_sub(sizeof(*key), &sk->sk_omem_alloc); kfree_rcu(key, rcu); @@ -1009,12 +1020,9 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval, if (sin->sin_family != AF_INET) return -EINVAL; - if (!cmd.tcpm_key || !cmd.tcpm_keylen) { - if (!tcp_sk(sk)->md5sig_info) - return -ENOENT; + if (!cmd.tcpm_key || !cmd.tcpm_keylen) return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr, AF_INET); - } if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) return -EINVAL; @@ -1896,7 +1904,7 @@ void tcp_v4_destroy_sock(struct sock *sk) /* Clean up the MD5 key list, if any */ if (tp->md5sig_info) { tcp_clear_md5_list(sk); - kfree(tp->md5sig_info); + kfree_rcu(tp->md5sig_info, rcu); tp->md5sig_info = NULL; } #endif diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index bec41f9a6413..c25018106ef2 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -571,8 +571,6 @@ static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval, return -EINVAL; if (!cmd.tcpm_keylen) { - if (!tcp_sk(sk)->md5sig_info) - return -ENOENT; if (ipv6_addr_v4mapped(&sin6->sin6_addr)) return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], AF_INET); -- cgit v1.2.3 From 658ddaaf6694adf63f67451dec9ddeb87a7cb2d7 Mon Sep 17 00:00:00 2001 From: Shawn Lu Date: Tue, 31 Jan 2012 22:35:48 +0000 Subject: tcp: md5: RST: getting md5 key from listener TCP RST mechanism is broken in TCP md5(RFC2385). When connection is gone, md5 key is lost, sending RST without md5 hash is deem to ignored by peer. This can be a problem since RST help protocal like bgp to fast recove from peer crash. In most case, users of tcp md5, such as bgp and ldp, have listener on both sides to accept connection from peer. md5 keys for peers are saved in listening socket. There are two cases in finding md5 key when connection is lost: 1.Passive receive RST: The message is send to well known port, tcp will associate it with listner. md5 key is gotten from listener. 2.Active receive RST (no sock): The message is send to ative side, there is no socket associated with the message. In this case, finding listener from source port, then find md5 key from listener. we are not loosing sercuriy here: packet is checked with md5 hash. No RST is generated if md5 hash doesn't match or no md5 key can be found. Signed-off-by: Shawn Lu Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 45 ++++++++++++++++++++++++++++++++++++++++++--- net/ipv6/tcp_ipv6.c | 43 +++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 83 insertions(+), 5 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 567cca9b30df..90e47931e217 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -593,6 +593,10 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) struct ip_reply_arg arg; #ifdef CONFIG_TCP_MD5SIG struct tcp_md5sig_key *key; + const __u8 *hash_location = NULL; + unsigned char newhash[16]; + int genhash; + struct sock *sk1 = NULL; #endif struct net *net; @@ -623,9 +627,36 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) arg.iov[0].iov_len = sizeof(rep.th); #ifdef CONFIG_TCP_MD5SIG - key = sk ? tcp_md5_do_lookup(sk, - (union tcp_md5_addr *)&ip_hdr(skb)->saddr, - AF_INET) : NULL; + hash_location = tcp_parse_md5sig_option(th); + if (!sk && hash_location) { + /* + * active side is lost. Try to find listening socket through + * source port, and then find md5 key through listening socket. + * we are not loose security here: + * Incoming packet is checked with md5 hash with finding key, + * no RST generated if md5 hash doesn't match. + */ + sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev), + &tcp_hashinfo, ip_hdr(skb)->daddr, + ntohs(th->source), inet_iif(skb)); + /* don't send rst if it can't find key */ + if (!sk1) + return; + rcu_read_lock(); + key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *) + &ip_hdr(skb)->saddr, AF_INET); + if (!key) + goto release_sk1; + + genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, NULL, skb); + if (genhash || memcmp(hash_location, newhash, 16) != 0) + goto release_sk1; + } else { + key = sk ? tcp_md5_do_lookup(sk, (union tcp_md5_addr *) + &ip_hdr(skb)->saddr, + AF_INET) : NULL; + } + if (key) { rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | @@ -653,6 +684,14 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS); + +#ifdef CONFIG_TCP_MD5SIG +release_sk1: + if (sk1) { + rcu_read_unlock(); + sock_put(sk1); + } +#endif } /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index c25018106ef2..d16414cb3421 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -923,6 +923,13 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) const struct tcphdr *th = tcp_hdr(skb); u32 seq = 0, ack_seq = 0; struct tcp_md5sig_key *key = NULL; +#ifdef CONFIG_TCP_MD5SIG + const __u8 *hash_location = NULL; + struct ipv6hdr *ipv6h = ipv6_hdr(skb); + unsigned char newhash[16]; + int genhash; + struct sock *sk1 = NULL; +#endif if (th->rst) return; @@ -931,8 +938,32 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) return; #ifdef CONFIG_TCP_MD5SIG - if (sk) - key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr); + hash_location = tcp_parse_md5sig_option(th); + if (!sk && hash_location) { + /* + * active side is lost. Try to find listening socket through + * source port, and then find md5 key through listening socket. + * we are not loose security here: + * Incoming packet is checked with md5 hash with finding key, + * no RST generated if md5 hash doesn't match. + */ + sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev), + &tcp_hashinfo, &ipv6h->daddr, + ntohs(th->source), inet6_iif(skb)); + if (!sk1) + return; + + rcu_read_lock(); + key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr); + if (!key) + goto release_sk1; + + genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, NULL, skb); + if (genhash || memcmp(hash_location, newhash, 16) != 0) + goto release_sk1; + } else { + key = sk ? tcp_v6_md5_do_lookup(sk, &ipv6h->saddr) : NULL; + } #endif if (th->ack) @@ -942,6 +973,14 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) (th->doff << 2); tcp_v6_send_response(skb, seq, ack_seq, 0, 0, key, 1, 0); + +#ifdef CONFIG_TCP_MD5SIG +release_sk1: + if (sk1) { + rcu_read_unlock(); + sock_put(sk1); + } +#endif } static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts, -- cgit v1.2.3 From 76e21053b5bf33a07c76f99d27a74238310e3c71 Mon Sep 17 00:00:00 2001 From: "Erich E. Hoover" Date: Wed, 8 Feb 2012 09:11:07 +0000 Subject: ipv4: Implement IP_UNICAST_IF socket option. The IP_UNICAST_IF feature is needed by the Wine project. This patch implements the feature by setting the outgoing interface in a similar fashion to that of IP_MULTICAST_IF. A separate option is needed to handle this feature since the existing options do not provide all of the characteristics required by IP_UNICAST_IF, a summary is provided below. SO_BINDTODEVICE: * SO_BINDTODEVICE requires administrative privileges, IP_UNICAST_IF does not. From reading some old mailing list articles my understanding is that SO_BINDTODEVICE requires administrative privileges because it can override the administrator's routing settings. * The SO_BINDTODEVICE option restricts both outbound and inbound traffic, IP_UNICAST_IF only impacts outbound traffic. IP_PKTINFO: * Since IP_PKTINFO and IP_UNICAST_IF are independent options, implementing IP_UNICAST_IF with IP_PKTINFO will likely break some applications. * Implementing IP_UNICAST_IF on top of IP_PKTINFO significantly complicates the Wine codebase and reduces the socket performance (doing this requires a lot of extra communication between the "server" and "user" layers). bind(): * bind() does not work on broadcast packets, IP_UNICAST_IF is specifically intended to work with broadcast packets. * Like SO_BINDTODEVICE, bind() restricts both outbound and inbound traffic. Signed-off-by: Erich E. Hoover Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/in.h | 1 + include/net/inet_sock.h | 2 ++ net/ipv4/ip_sockglue.c | 33 +++++++++++++++++++++++++++++++++ net/ipv4/ping.c | 3 ++- net/ipv4/raw.c | 3 ++- net/ipv4/udp.c | 3 ++- 6 files changed, 42 insertions(+), 3 deletions(-) (limited to 'net/ipv4') diff --git a/include/linux/in.h b/include/linux/in.h index 01129c0ea87c..e0337f11d92e 100644 --- a/include/linux/in.h +++ b/include/linux/in.h @@ -111,6 +111,7 @@ struct in_addr { #define MCAST_LEAVE_SOURCE_GROUP 47 #define MCAST_MSFILTER 48 #define IP_MULTICAST_ALL 49 +#define IP_UNICAST_IF 50 #define MCAST_EXCLUDE 0 #define MCAST_INCLUDE 1 diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index e3e405106afe..022f772c0ebe 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -132,6 +132,7 @@ struct rtable; * @tos - TOS * @mc_ttl - Multicasting TTL * @is_icsk - is this an inet_connection_sock? + * @uc_index - Unicast outgoing device index * @mc_index - Multicast device index * @mc_list - Group array * @cork - info to build ip hdr on each ip frag while socket is corked @@ -167,6 +168,7 @@ struct inet_sock { transparent:1, mc_all:1, nodefrag:1; + int uc_index; int mc_index; __be32 mc_addr; struct ip_mc_socklist __rcu *mc_list; diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 8aa87c19fa00..9125529dab95 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -469,6 +469,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, (1<mc_loop = !!val; break; + case IP_UNICAST_IF: + { + struct net_device *dev = NULL; + int ifindex; + + if (optlen != sizeof(int)) + goto e_inval; + + ifindex = (__force int)ntohl((__force __be32)val); + if (ifindex == 0) { + inet->uc_index = 0; + err = 0; + break; + } + + dev = dev_get_by_index(sock_net(sk), ifindex); + err = -EADDRNOTAVAIL; + if (!dev) + break; + dev_put(dev); + + err = -EINVAL; + if (sk->sk_bound_dev_if) + break; + + inet->uc_index = ifindex; + err = 0; + break; + } case IP_MULTICAST_IF: { struct ip_mreqn mreq; @@ -1178,6 +1208,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, case IP_MULTICAST_LOOP: val = inet->mc_loop; break; + case IP_UNICAST_IF: + val = (__force int)htonl((__u32) inet->uc_index); + break; case IP_MULTICAST_IF: { struct in_addr addr; diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index aea5a199c37a..cfc82cf339f6 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -556,7 +556,8 @@ static int ping_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, ipc.oif = inet->mc_index; if (!saddr) saddr = inet->mc_addr; - } + } else if (!ipc.oif) + ipc.oif = inet->uc_index; flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, sk->sk_protocol, diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 3ccda5ae8a27..ab466305b629 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -563,7 +563,8 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, ipc.oif = inet->mc_index; if (!saddr) saddr = inet->mc_addr; - } + } else if (!ipc.oif) + ipc.oif = inet->uc_index; flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 5d075b5f70fc..cd99f1a0f59f 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -917,7 +917,8 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (!saddr) saddr = inet->mc_addr; connected = 0; - } + } else if (!ipc.oif) + ipc.oif = inet->uc_index; if (connected) rt = (struct rtable *)sk_dst_check(sk, 0); -- cgit v1.2.3 From 4c507d2897bd9be810b3403ade73b04cf6fdfd4a Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Thu, 9 Feb 2012 09:35:49 +0000 Subject: net: implement IP_RECVTOS for IP_PKTOPTIONS Currently, it is not easily possible to get TOS/DSCP value of packets from an incoming TCP stream. The mechanism is there, IP_PKTOPTIONS getsockopt with IP_RECVTOS set, the same way as incoming TTL can be queried. This is not actually implemented for TOS, though. This patch adds this functionality, both for IPv4 (IP_PKTOPTIONS) and IPv6 (IPV6_2292PKTOPTIONS). For IPv4, like in the IP_RECVTTL case, the value of the TOS field is stored from the other party's ACK. This is needed for proxies which require DSCP transparency. One such example is at http://zph.bratcheda.org/. Signed-off-by: Jiri Benc Signed-off-by: David S. Miller --- include/linux/ipv6.h | 2 +- include/net/inet_sock.h | 1 + net/ipv4/af_inet.c | 1 + net/ipv4/ip_sockglue.c | 4 ++++ net/ipv4/tcp_ipv4.c | 1 + net/ipv6/af_inet6.c | 1 + net/ipv6/ipv6_sockglue.c | 4 ++++ net/ipv6/tcp_ipv6.c | 4 ++++ 8 files changed, 17 insertions(+), 1 deletion(-) (limited to 'net/ipv4') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 4847a64d3c0a..8260ef779762 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -366,7 +366,7 @@ struct ipv6_pinfo { dontfrag:1; __u8 min_hopcount; __u8 tclass; - __u8 padding; + __u8 rcv_tclass; __u32 dst_cookie; diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 022f772c0ebe..ae17e1352d7e 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -168,6 +168,7 @@ struct inet_sock { transparent:1, mc_all:1, nodefrag:1; + __u8 rcv_tos; int uc_index; int mc_index; __be32 mc_addr; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index f7b5670744f0..e588a34e85c2 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -381,6 +381,7 @@ lookup_protocol: inet->mc_all = 1; inet->mc_index = 0; inet->mc_list = NULL; + inet->rcv_tos = 0; sk_refcnt_debug_inc(sk); diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 9125529dab95..ca50d9f9f8c1 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -1289,6 +1289,10 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, int hlim = inet->mc_ttl; put_cmsg(&msg, SOL_IP, IP_TTL, sizeof(hlim), &hlim); } + if (inet->cmsg_flags & IP_CMSG_TOS) { + int tos = inet->rcv_tos; + put_cmsg(&msg, SOL_IP, IP_TOS, sizeof(tos), &tos); + } len -= msg.msg_controllen; return put_user(len, optlen); } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 4d6f81c818dc..94abee8cf563 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1463,6 +1463,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, ireq->opt = NULL; newinet->mc_index = inet_iif(skb); newinet->mc_ttl = ip_hdr(skb)->ttl; + newinet->rcv_tos = ip_hdr(skb)->tos; inet_csk(newsk)->icsk_ext_hdr_len = 0; if (inet_opt) inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 273f48d1df2e..5605f9dca87e 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -214,6 +214,7 @@ lookup_protocol: inet->mc_ttl = 1; inet->mc_index = 0; inet->mc_list = NULL; + inet->rcv_tos = 0; if (ipv4_config.no_pmtu_disc) inet->pmtudisc = IP_PMTUDISC_DONT; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 6d6b65fdaa1a..63dd1f89ed7d 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -1017,6 +1017,10 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, int hlim = np->mcast_hops; put_cmsg(&msg, SOL_IPV6, IPV6_HOPLIMIT, sizeof(hlim), &hlim); } + if (np->rxopt.bits.rxtclass) { + int tclass = np->rcv_tclass; + put_cmsg(&msg, SOL_IPV6, IPV6_TCLASS, sizeof(tclass), &tclass); + } if (np->rxopt.bits.rxoinfo) { struct in6_pktinfo src_info; src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif : diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index d16414cb3421..12c6ece67f39 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1282,6 +1282,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newnp->opt = NULL; newnp->mcast_oif = inet6_iif(skb); newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; + newnp->rcv_tclass = ipv6_tclass(ipv6_hdr(skb)); /* * No need to charge this sock to the relevant IPv6 refcnt debug socks count @@ -1360,6 +1361,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newnp->opt = NULL; newnp->mcast_oif = inet6_iif(skb); newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; + newnp->rcv_tclass = ipv6_tclass(ipv6_hdr(skb)); /* Clone native IPv6 options from listening socket (if any) @@ -1562,6 +1564,8 @@ ipv6_pktoptions: np->mcast_oif = inet6_iif(opt_skb); if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit; + if (np->rxopt.bits.rxtclass) + np->rcv_tclass = ipv6_tclass(ipv6_hdr(skb)); if (ipv6_opt_accepted(sk, opt_skb)) { skb_set_owner_r(opt_skb, sk); opt_skb = xchg(&np->pktoptions, opt_skb); -- cgit v1.2.3 From f2cedb63df14342ad40a8b5b324fc5d94a60b665 Mon Sep 17 00:00:00 2001 From: Danny Kukawka Date: Wed, 15 Feb 2012 06:45:39 +0000 Subject: net: replace random_ether_addr() with eth_hw_addr_random() Replace usage of random_ether_addr() with eth_hw_addr_random() to set addr_assign_type correctly to NET_ADDR_RANDOM. Change the trivial cases. v2: adapt to renamed eth_hw_addr_random() Signed-off-by: Danny Kukawka Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb.c | 2 +- drivers/net/ethernet/cirrus/ep93xx_eth.c | 2 +- drivers/net/ethernet/davicom/dm9000.c | 2 +- drivers/net/ethernet/dnet.c | 2 +- drivers/net/ethernet/faraday/ftgmac100.c | 2 +- drivers/net/ethernet/faraday/ftmac100.c | 2 +- drivers/net/ethernet/mipsnet.c | 2 +- drivers/net/ethernet/natsemi/macsonic.c | 2 +- drivers/net/ethernet/rdc/r6040.c | 2 +- drivers/net/ethernet/sis/sis900.c | 2 +- drivers/net/ethernet/smsc/smsc9420.c | 5 ++--- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +- drivers/net/ethernet/toshiba/tc35815.c | 2 +- drivers/net/ethernet/via/via-rhine.c | 2 +- drivers/net/ifb.c | 2 +- drivers/net/tun.c | 2 +- drivers/net/usb/smsc75xx.c | 2 +- drivers/net/usb/smsc95xx.c | 2 +- drivers/net/veth.c | 4 ++-- drivers/net/virtio_net.c | 2 +- drivers/net/wan/hdlc_fr.c | 2 +- drivers/net/wan/hdlc_raw_eth.c | 2 +- net/ipv4/ip_gre.c | 2 +- net/l2tp/l2tp_eth.c | 2 +- 24 files changed, 26 insertions(+), 27 deletions(-) (limited to 'net/ipv4') diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c index 85e044567f68..c4834c23be35 100644 --- a/drivers/net/ethernet/cadence/macb.c +++ b/drivers/net/ethernet/cadence/macb.c @@ -87,7 +87,7 @@ static void __init macb_get_hwaddr(struct macb *bp) memcpy(bp->dev->dev_addr, addr, sizeof(addr)); } else { netdev_info(bp->dev, "invalid hw address, using random\n"); - random_ether_addr(bp->dev->dev_addr); + eth_hw_addr_random(bp->dev); } } diff --git a/drivers/net/ethernet/cirrus/ep93xx_eth.c b/drivers/net/ethernet/cirrus/ep93xx_eth.c index c21e5ab8d1ef..78c55213eaf7 100644 --- a/drivers/net/ethernet/cirrus/ep93xx_eth.c +++ b/drivers/net/ethernet/cirrus/ep93xx_eth.c @@ -859,7 +859,7 @@ static int ep93xx_eth_probe(struct platform_device *pdev) ep->mdc_divisor = 40; /* Max HCLK 100 MHz, min MDIO clk 2.5 MHz. */ if (is_zero_ether_addr(dev->dev_addr)) - random_ether_addr(dev->dev_addr); + eth_hw_addr_random(dev); err = register_netdev(dev); if (err) { diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c index 42383ab5227e..36499d5edd95 100644 --- a/drivers/net/ethernet/davicom/dm9000.c +++ b/drivers/net/ethernet/davicom/dm9000.c @@ -1585,7 +1585,7 @@ dm9000_probe(struct platform_device *pdev) dev_warn(db->dev, "%s: Invalid ethernet MAC address. Please " "set using ifconfig\n", ndev->name); - random_ether_addr(ndev->dev_addr); + eth_hw_addr_random(ndev); mac_src = "random"; } diff --git a/drivers/net/ethernet/dnet.c b/drivers/net/ethernet/dnet.c index 8536e376555a..b276469f74e9 100644 --- a/drivers/net/ethernet/dnet.c +++ b/drivers/net/ethernet/dnet.c @@ -895,7 +895,7 @@ static int __devinit dnet_probe(struct platform_device *pdev) if (!is_valid_ether_addr(dev->dev_addr)) { /* choose a random ethernet address */ - random_ether_addr(dev->dev_addr); + eth_hw_addr_random(dev); __dnet_set_hwaddr(bp); } diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 47f85c337cf7..16b07048274c 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -1289,7 +1289,7 @@ static int ftgmac100_probe(struct platform_device *pdev) netdev_info(netdev, "irq %d, mapped at %p\n", priv->irq, priv->base); if (!is_valid_ether_addr(netdev->dev_addr)) { - random_ether_addr(netdev->dev_addr); + eth_hw_addr_random(netdev); netdev_info(netdev, "generated random MAC address %pM\n", netdev->dev_addr); } diff --git a/drivers/net/ethernet/faraday/ftmac100.c b/drivers/net/ethernet/faraday/ftmac100.c index bb336a0959c9..829b1092fd78 100644 --- a/drivers/net/ethernet/faraday/ftmac100.c +++ b/drivers/net/ethernet/faraday/ftmac100.c @@ -1133,7 +1133,7 @@ static int ftmac100_probe(struct platform_device *pdev) netdev_info(netdev, "irq %d, mapped at %p\n", priv->irq, priv->base); if (!is_valid_ether_addr(netdev->dev_addr)) { - random_ether_addr(netdev->dev_addr); + eth_hw_addr_random(netdev); netdev_info(netdev, "generated random MAC address %pM\n", netdev->dev_addr); } diff --git a/drivers/net/ethernet/mipsnet.c b/drivers/net/ethernet/mipsnet.c index dbc666a3d523..db5285befe2a 100644 --- a/drivers/net/ethernet/mipsnet.c +++ b/drivers/net/ethernet/mipsnet.c @@ -281,7 +281,7 @@ static int __devinit mipsnet_probe(struct platform_device *dev) * Lacking any better mechanism to allocate a MAC address we use a * random one ... */ - random_ether_addr(netdev->dev_addr); + eth_hw_addr_random(netdev); err = register_netdev(netdev); if (err) { diff --git a/drivers/net/ethernet/natsemi/macsonic.c b/drivers/net/ethernet/natsemi/macsonic.c index f1b85561c650..e640e23460de 100644 --- a/drivers/net/ethernet/natsemi/macsonic.c +++ b/drivers/net/ethernet/natsemi/macsonic.c @@ -307,7 +307,7 @@ static void __devinit mac_onboard_sonic_ethernet_addr(struct net_device *dev) printk(KERN_WARNING "macsonic: MAC address in CAM entry 15 " "seems invalid, will use a random MAC\n"); - random_ether_addr(dev->dev_addr); + eth_hw_addr_random(dev); } static int __devinit mac_onboard_sonic_probe(struct net_device *dev) diff --git a/drivers/net/ethernet/rdc/r6040.c b/drivers/net/ethernet/rdc/r6040.c index 76cab284876b..b96e1920e045 100644 --- a/drivers/net/ethernet/rdc/r6040.c +++ b/drivers/net/ethernet/rdc/r6040.c @@ -1151,7 +1151,7 @@ static int __devinit r6040_init_one(struct pci_dev *pdev, if (!(adrp[0] || adrp[1] || adrp[2])) { netdev_warn(dev, "MAC address not initialized, " "generating random\n"); - random_ether_addr(dev->dev_addr); + eth_hw_addr_random(dev); } /* Link new device into r6040_root_dev */ diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c index f968898c86db..5ccf02e7e3ad 100644 --- a/drivers/net/ethernet/sis/sis900.c +++ b/drivers/net/ethernet/sis/sis900.c @@ -527,7 +527,7 @@ static int __devinit sis900_probe(struct pci_dev *pci_dev, ret = sis900_get_mac_addr(pci_dev, net_dev); if (!ret || !is_valid_ether_addr(net_dev->dev_addr)) { - random_ether_addr(net_dev->dev_addr); + eth_hw_addr_random(net_dev); printk(KERN_WARNING "%s: Unreadable or invalid MAC address," "using random generated one\n", dev_name); } diff --git a/drivers/net/ethernet/smsc/smsc9420.c b/drivers/net/ethernet/smsc/smsc9420.c index ee1a1680b7a8..38386478532b 100644 --- a/drivers/net/ethernet/smsc/smsc9420.c +++ b/drivers/net/ethernet/smsc/smsc9420.c @@ -509,10 +509,9 @@ static void smsc9420_check_mac_address(struct net_device *dev) smsc_dbg(PROBE, "Mac Address is read from EEPROM"); } else { /* eeprom values are invalid, generate random MAC */ - random_ether_addr(dev->dev_addr); + eth_hw_addr_random(dev); smsc9420_set_mac_address(dev); - smsc_dbg(PROBE, - "MAC Address is set to random_ether_addr"); + smsc_dbg(PROBE, "MAC Address is set to random"); } } } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 166fc95e5baf..ab36dfcbd817 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -910,7 +910,7 @@ static void stmmac_check_ether_addr(struct stmmac_priv *priv) priv->dev->base_addr, priv->dev->dev_addr, 0); if (!is_valid_ether_addr(priv->dev->dev_addr)) - random_ether_addr(priv->dev->dev_addr); + eth_hw_addr_random(priv->dev); } pr_warning("%s: device MAC address %pM\n", priv->dev->name, priv->dev->dev_addr); diff --git a/drivers/net/ethernet/toshiba/tc35815.c b/drivers/net/ethernet/toshiba/tc35815.c index d117ad475c3e..651a70c55e6e 100644 --- a/drivers/net/ethernet/toshiba/tc35815.c +++ b/drivers/net/ethernet/toshiba/tc35815.c @@ -849,7 +849,7 @@ static int __devinit tc35815_init_one(struct pci_dev *pdev, /* Retrieve the ethernet address. */ if (tc35815_init_dev_addr(dev)) { dev_warn(&pdev->dev, "not valid ether addr\n"); - random_ether_addr(dev->dev_addr); + eth_hw_addr_random(dev); } rc = register_netdev(dev); diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c index cb2ff28a0602..39b8cf3dafcd 100644 --- a/drivers/net/ethernet/via/via-rhine.c +++ b/drivers/net/ethernet/via/via-rhine.c @@ -983,7 +983,7 @@ static int __devinit rhine_init_one(struct pci_dev *pdev, if (!is_valid_ether_addr(dev->dev_addr)) { /* Report it and use a random ethernet address instead */ netdev_err(dev, "Invalid MAC address: %pM\n", dev->dev_addr); - random_ether_addr(dev->dev_addr); + eth_hw_addr_random(dev); netdev_info(dev, "Using random MAC address: %pM\n", dev->dev_addr); } diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c index e05b645bbc32..344dceb1aaf9 100644 --- a/drivers/net/ifb.c +++ b/drivers/net/ifb.c @@ -184,7 +184,7 @@ static void ifb_setup(struct net_device *dev) dev->flags |= IFF_NOARP; dev->flags &= ~IFF_MULTICAST; dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING); - random_ether_addr(dev->dev_addr); + eth_hw_addr_random(dev); } static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 93c5d72711b0..2c5d34957c57 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -531,7 +531,7 @@ static void tun_net_init(struct net_device *dev) ether_setup(dev); dev->priv_flags &= ~IFF_TX_SKB_SHARING; - random_ether_addr(dev->dev_addr); + eth_hw_addr_random(dev); dev->tx_queue_len = TUN_READQ_SIZE; /* We prefer our own queue length */ break; diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c index 3b017bbd2a22..187d01ccb973 100644 --- a/drivers/net/usb/smsc75xx.c +++ b/drivers/net/usb/smsc75xx.c @@ -615,7 +615,7 @@ static void smsc75xx_init_mac_address(struct usbnet *dev) } /* no eeprom, or eeprom values are invalid. generate random MAC */ - random_ether_addr(dev->net->dev_addr); + eth_hw_addr_random(dev->net); netif_dbg(dev, ifup, dev->net, "MAC address set to random_ether_addr"); } diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index d45520e6dd48..5f19f84d3494 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -614,7 +614,7 @@ static void smsc95xx_init_mac_address(struct usbnet *dev) } /* no eeprom, or eeprom values are invalid. generate random MAC */ - random_ether_addr(dev->net->dev_addr); + eth_hw_addr_random(dev->net); netif_dbg(dev, ifup, dev->net, "MAC address set to random_ether_addr\n"); } diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 49f4667e1fa3..b8a697f5cb69 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -346,7 +346,7 @@ static int veth_newlink(struct net *src_net, struct net_device *dev, } if (tbp[IFLA_ADDRESS] == NULL) - random_ether_addr(peer->dev_addr); + eth_hw_addr_random(peer); err = register_netdevice(peer); put_net(net); @@ -368,7 +368,7 @@ static int veth_newlink(struct net *src_net, struct net_device *dev, */ if (tb[IFLA_ADDRESS] == NULL) - random_ether_addr(dev->dev_addr); + eth_hw_addr_random(dev); if (tb[IFLA_IFNAME]) nla_strlcpy(dev->name, tb[IFLA_IFNAME], IFNAMSIZ); diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 83c503ed4ae8..019da012669f 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1060,7 +1060,7 @@ static int virtnet_probe(struct virtio_device *vdev) if (virtio_config_val_len(vdev, VIRTIO_NET_F_MAC, offsetof(struct virtio_net_config, mac), dev->dev_addr, dev->addr_len) < 0) - random_ether_addr(dev->dev_addr); + eth_hw_addr_random(dev); /* Set up our device-specific information */ vi = netdev_priv(dev); diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c index eb2028187fbe..7c6cb4f31798 100644 --- a/drivers/net/wan/hdlc_fr.c +++ b/drivers/net/wan/hdlc_fr.c @@ -1087,7 +1087,7 @@ static int fr_add_pvc(struct net_device *frad, unsigned int dlci, int type) } if (type == ARPHRD_ETHER) - random_ether_addr(dev->dev_addr); + eth_hw_addr_random(dev); else { *(__be16*)dev->dev_addr = htons(dlci); dlci_to_q922(dev->broadcast, dlci); diff --git a/drivers/net/wan/hdlc_raw_eth.c b/drivers/net/wan/hdlc_raw_eth.c index 05c9b0b96239..3ab72b3082de 100644 --- a/drivers/net/wan/hdlc_raw_eth.c +++ b/drivers/net/wan/hdlc_raw_eth.c @@ -101,7 +101,7 @@ static int raw_eth_ioctl(struct net_device *dev, struct ifreq *ifr) old_qlen = dev->tx_queue_len; ether_setup(dev); dev->tx_queue_len = old_qlen; - random_ether_addr(dev->dev_addr); + eth_hw_addr_random(dev); netif_dormant_off(dev); return 0; } diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 0286d78c589c..b59414a0c1ee 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1536,7 +1536,7 @@ static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nla return -EEXIST; if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS]) - random_ether_addr(dev->dev_addr); + eth_hw_addr_random(dev); mtu = ipgre_tunnel_bind_dev(dev); if (!tb[IFLA_MTU]) diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index d2726a74597d..63fe5f353f04 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -64,7 +64,7 @@ static int l2tp_eth_dev_init(struct net_device *dev) struct l2tp_eth *priv = netdev_priv(dev); priv->dev = dev; - random_ether_addr(dev->dev_addr); + eth_hw_addr_random(dev); memset(&dev->broadcast[0], 0xff, 6); return 0; -- cgit v1.2.3 From 80703d265b7e8a801560d907b1bfe340e574dbca Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 15 Feb 2012 17:48:35 -0500 Subject: ipv4: Eliminate spurious argument to __ipv4_neigh_lookup 'tbl' is always arp_tbl, so specifying it is pointless. Signed-off-by: David S. Miller --- include/net/arp.h | 4 ++-- net/ipv4/route.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'net/ipv4') diff --git a/include/net/arp.h b/include/net/arp.h index 0013dc87940b..4a1f3fb562eb 100644 --- a/include/net/arp.h +++ b/include/net/arp.h @@ -15,14 +15,14 @@ static inline u32 arp_hashfn(u32 key, const struct net_device *dev, u32 hash_rnd return val * hash_rnd; } -static inline struct neighbour *__ipv4_neigh_lookup(struct neigh_table *tbl, struct net_device *dev, u32 key) +static inline struct neighbour *__ipv4_neigh_lookup(struct net_device *dev, u32 key) { struct neigh_hash_table *nht; struct neighbour *n; u32 hash_val; rcu_read_lock_bh(); - nht = rcu_dereference_bh(tbl->nht); + nht = rcu_dereference_bh(arp_tbl.nht); hash_val = arp_hashfn(key, dev, nht->hash_rnd[0]) >> (32 - nht->hash_shift); for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]); n != NULL; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 4eeb8ce856e2..0489cedc1671 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1127,7 +1127,7 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, const vo else if (rt->rt_gateway) pkey = (const __be32 *) &rt->rt_gateway; - n = __ipv4_neigh_lookup(&arp_tbl, dev, *(__force u32 *)pkey); + n = __ipv4_neigh_lookup(dev, *(__force u32 *)pkey); if (n) return n; return neigh_create(&arp_tbl, pkey, dev); -- cgit v1.2.3 From 3f518bf745cbd6007d8069100fb9cb09e960c872 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 21 Feb 2012 07:30:58 +0000 Subject: datagram: Add offset argument to __skb_recv_datagram This one is only considered for MSG_PEEK flag and the value pointed by it specifies where to start peeking bytes from. If the offset happens to point into the middle of the returned skb, the offset within this skb is put back to this very argument. Signed-off-by: Pavel Emelyanov Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 +- net/core/datagram.c | 21 +++++++++++++-------- net/ipv4/udp.c | 4 ++-- net/ipv6/udp.c | 4 ++-- 4 files changed, 18 insertions(+), 13 deletions(-) (limited to 'net/ipv4') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2b7317ff297f..f3cf43de3c2a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2046,7 +2046,7 @@ static inline void skb_frag_add_head(struct sk_buff *skb, struct sk_buff *frag) for (iter = skb_shinfo(skb)->frag_list; iter; iter = iter->next) extern struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags, - int *peeked, int *err); + int *peeked, int *off, int *err); extern struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock, int *err); extern unsigned int datagram_poll(struct file *file, struct socket *sock, diff --git a/net/core/datagram.c b/net/core/datagram.c index 6f54d0a17f8e..d3cf12f62c8f 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -132,6 +132,8 @@ out_noerr: * __skb_recv_datagram - Receive a datagram skbuff * @sk: socket * @flags: MSG_ flags + * @off: an offset in bytes to peek skb from. Returns an offset + * within an skb where data actually starts * @peeked: returns non-zero if this packet has been seen before * @err: error code returned * @@ -158,7 +160,7 @@ out_noerr: * the standard around please. */ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags, - int *peeked, int *err) + int *peeked, int *off, int *err) { struct sk_buff *skb; long timeo; @@ -183,19 +185,22 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags, struct sk_buff_head *queue = &sk->sk_receive_queue; spin_lock_irqsave(&queue->lock, cpu_flags); - skb = skb_peek(queue); - if (skb) { + skb_queue_walk(queue, skb) { *peeked = skb->peeked; if (flags & MSG_PEEK) { + if (*off >= skb->len) { + *off -= skb->len; + continue; + } skb->peeked = 1; atomic_inc(&skb->users); } else __skb_unlink(skb, queue); - } - spin_unlock_irqrestore(&queue->lock, cpu_flags); - if (skb) + spin_unlock_irqrestore(&queue->lock, cpu_flags); return skb; + } + spin_unlock_irqrestore(&queue->lock, cpu_flags); /* User doesn't want to wait */ error = -EAGAIN; @@ -215,10 +220,10 @@ EXPORT_SYMBOL(__skb_recv_datagram); struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock, int *err) { - int peeked; + int peeked, off = 0; return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), - &peeked, err); + &peeked, &off, err); } EXPORT_SYMBOL(skb_recv_datagram); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index cd99f1a0f59f..7c41ab84e72e 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1167,7 +1167,7 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; struct sk_buff *skb; unsigned int ulen, copied; - int peeked; + int peeked, off = 0; int err; int is_udplite = IS_UDPLITE(sk); bool slow; @@ -1183,7 +1183,7 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, try_again: skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), - &peeked, &err); + &peeked, &off, &err); if (!skb) goto out; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 8aebf8f90436..37b0699e95e5 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -342,7 +342,7 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, struct inet_sock *inet = inet_sk(sk); struct sk_buff *skb; unsigned int ulen, copied; - int peeked; + int peeked, off = 0; int err; int is_udplite = IS_UDPLITE(sk); int is_udp4; @@ -359,7 +359,7 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, try_again: skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), - &peeked, &err); + &peeked, &off, &err); if (!skb) goto out; -- cgit v1.2.3 From 80d326fab534a5380e8f6e509a0b9076655a9670 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 24 Feb 2012 14:30:15 +0000 Subject: netlink: add netlink_dump_control structure for netlink_dump_start() Davem considers that the argument list of this interface is getting out of control. This patch tries to address this issue following his proposal: struct netlink_dump_control c = { .dump = dump, .done = done, ... }; netlink_dump_start(..., &c); Suggested by David S. Miller. Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- crypto/crypto_user.c | 10 +++++++--- drivers/infiniband/core/netlink.c | 10 +++++++--- include/linux/netlink.h | 10 +++++++--- net/core/rtnetlink.c | 9 +++++++-- net/ipv4/inet_diag.c | 18 ++++++++++++------ net/netfilter/ipset/ip_set_core.c | 10 +++++++--- net/netfilter/nf_conntrack_netlink.c | 18 ++++++++++++------ net/netfilter/nfnetlink_acct.c | 6 ++++-- net/netlink/af_netlink.c | 11 ++++------- net/netlink/genetlink.c | 9 +++++++-- net/unix/diag.c | 10 ++++++---- net/xfrm/xfrm_user.c | 9 +++++++-- 12 files changed, 87 insertions(+), 43 deletions(-) (limited to 'net/ipv4') diff --git a/crypto/crypto_user.c b/crypto/crypto_user.c index 16f8693cc147..b6ac1387770c 100644 --- a/crypto/crypto_user.c +++ b/crypto/crypto_user.c @@ -389,9 +389,13 @@ static int crypto_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) (nlh->nlmsg_flags & NLM_F_DUMP))) { if (link->dump == NULL) return -EINVAL; - - return netlink_dump_start(crypto_nlsk, skb, nlh, - link->dump, link->done, 0); + { + struct netlink_dump_control c = { + .dump = link->dump, + .done = link->done, + }; + return netlink_dump_start(crypto_nlsk, skb, nlh, &c); + } } err = nlmsg_parse(nlh, crypto_msg_min[type], attrs, CRYPTOCFGA_MAX, diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c index d1c8196d15d7..396e29370304 100644 --- a/drivers/infiniband/core/netlink.c +++ b/drivers/infiniband/core/netlink.c @@ -147,9 +147,13 @@ static int ibnl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (op < 0 || op >= client->nops || !client->cb_table[RDMA_NL_GET_OP(op)].dump) return -EINVAL; - return netlink_dump_start(nls, skb, nlh, - client->cb_table[op].dump, - NULL, 0); + + { + struct netlink_dump_control c = { + .dump = client->cb_table[op].dump, + }; + return netlink_dump_start(nls, skb, nlh, &c); + } } } diff --git a/include/linux/netlink.h b/include/linux/netlink.h index a390e9d54827..1f8c1a95f57c 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -248,11 +248,15 @@ __nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, int type, int len, int flags) #define NLMSG_PUT(skb, pid, seq, type, len) \ NLMSG_NEW(skb, pid, seq, type, len, 0) +struct netlink_dump_control { + int (*dump)(struct sk_buff *skb, struct netlink_callback *); + int (*done)(struct netlink_callback*); + u16 min_dump_alloc; +}; + extern int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, const struct nlmsghdr *nlh, - int (*dump)(struct sk_buff *skb, struct netlink_callback*), - int (*done)(struct netlink_callback*), - u16 min_dump_alloc); + struct netlink_dump_control *control); #define NL_NONROOT_RECV 0x1 diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 65aebd450027..7aef62e53113 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1981,8 +1981,13 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) __rtnl_unlock(); rtnl = net->rtnl; - err = netlink_dump_start(rtnl, skb, nlh, dumpit, - NULL, min_dump_alloc); + { + struct netlink_dump_control c = { + .dump = dumpit, + .min_dump_alloc = min_dump_alloc, + }; + err = netlink_dump_start(rtnl, skb, nlh, &c); + } rtnl_lock(); return err; } diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index fcf281819cd4..8d25a1c557eb 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -960,9 +960,12 @@ static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh) inet_diag_bc_audit(nla_data(attr), nla_len(attr))) return -EINVAL; } - - return netlink_dump_start(sock_diag_nlsk, skb, nlh, - inet_diag_dump_compat, NULL, 0); + { + struct netlink_dump_control c = { + .dump = inet_diag_dump_compat, + }; + return netlink_dump_start(sock_diag_nlsk, skb, nlh, &c); + } } return inet_diag_get_exact_compat(skb, nlh); @@ -985,9 +988,12 @@ static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) inet_diag_bc_audit(nla_data(attr), nla_len(attr))) return -EINVAL; } - - return netlink_dump_start(sock_diag_nlsk, skb, h, - inet_diag_dump, NULL, 0); + { + struct netlink_dump_control c = { + .dump = inet_diag_dump, + }; + return netlink_dump_start(sock_diag_nlsk, skb, h, &c); + } } return inet_diag_get_exact(skb, h, (struct inet_diag_req_v2 *)NLMSG_DATA(h)); diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 32dbf0fa89db..e7f90e7082b4 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1162,9 +1162,13 @@ ip_set_dump(struct sock *ctnl, struct sk_buff *skb, if (unlikely(protocol_failed(attr))) return -IPSET_ERR_PROTOCOL; - return netlink_dump_start(ctnl, skb, nlh, - ip_set_dump_start, - ip_set_dump_done, 0); + { + struct netlink_dump_control c = { + .dump = ip_set_dump_start, + .done = ip_set_dump_done, + }; + return netlink_dump_start(ctnl, skb, nlh, &c); + } } /* Add, del and test */ diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 9307b033c0c9..61f7feb7932b 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -977,9 +977,13 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, u16 zone; int err; - if (nlh->nlmsg_flags & NLM_F_DUMP) - return netlink_dump_start(ctnl, skb, nlh, ctnetlink_dump_table, - ctnetlink_done, 0); + if (nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = ctnetlink_dump_table, + .done = ctnetlink_done, + }; + return netlink_dump_start(ctnl, skb, nlh, &c); + } err = ctnetlink_parse_zone(cda[CTA_ZONE], &zone); if (err < 0) @@ -1850,9 +1854,11 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, int err; if (nlh->nlmsg_flags & NLM_F_DUMP) { - return netlink_dump_start(ctnl, skb, nlh, - ctnetlink_exp_dump_table, - ctnetlink_exp_done, 0); + struct netlink_dump_control c = { + .dump = ctnetlink_exp_dump_table, + .done = ctnetlink_exp_done, + }; + return netlink_dump_start(ctnl, skb, nlh, &c); } err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone); diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index 11ba013e47f6..3eb348bfc4fb 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -171,8 +171,10 @@ nfnl_acct_get(struct sock *nfnl, struct sk_buff *skb, char *acct_name; if (nlh->nlmsg_flags & NLM_F_DUMP) { - return netlink_dump_start(nfnl, skb, nlh, nfnl_acct_dump, - NULL, 0); + struct netlink_dump_control c = { + .dump = nfnl_acct_dump, + }; + return netlink_dump_start(nfnl, skb, nlh, &c); } if (!tb[NFACCT_NAME]) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 4d751e3d4b4b..ab74845876d2 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1736,10 +1736,7 @@ errout_skb: int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, const struct nlmsghdr *nlh, - int (*dump)(struct sk_buff *skb, - struct netlink_callback *), - int (*done)(struct netlink_callback *), - u16 min_dump_alloc) + struct netlink_dump_control *control) { struct netlink_callback *cb; struct sock *sk; @@ -1750,10 +1747,10 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, if (cb == NULL) return -ENOBUFS; - cb->dump = dump; - cb->done = done; + cb->dump = control->dump; + cb->done = control->done; cb->nlh = nlh; - cb->min_dump_alloc = min_dump_alloc; + cb->min_dump_alloc = control->min_dump_alloc; atomic_inc(&skb->users); cb->skb = skb; diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index a1154717219e..9f40441d7a7d 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -563,8 +563,13 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return -EOPNOTSUPP; genl_unlock(); - err = netlink_dump_start(net->genl_sock, skb, nlh, - ops->dumpit, ops->done, 0); + { + struct netlink_dump_control c = { + .dump = ops->dumpit, + .done = ops->done, + }; + err = netlink_dump_start(net->genl_sock, skb, nlh, &c); + } genl_lock(); return err; } diff --git a/net/unix/diag.c b/net/unix/diag.c index 6b7697fd911b..4195555aea65 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -301,10 +301,12 @@ static int unix_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) if (nlmsg_len(h) < hdrlen) return -EINVAL; - if (h->nlmsg_flags & NLM_F_DUMP) - return netlink_dump_start(sock_diag_nlsk, skb, h, - unix_diag_dump, NULL, 0); - else + if (h->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = unix_diag_dump, + }; + return netlink_dump_start(sock_diag_nlsk, skb, h, &c); + } else return unix_diag_get_exact(skb, h, (struct unix_diag_req *)NLMSG_DATA(h)); } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 66b84fbf2746..7128dde0fe1a 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -2299,8 +2299,13 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (link->dump == NULL) return -EINVAL; - return netlink_dump_start(net->xfrm.nlsk, skb, nlh, - link->dump, link->done, 0); + { + struct netlink_dump_control c = { + .dump = link->dump, + .done = link->done, + }; + return netlink_dump_start(net->xfrm.nlsk, skb, nlh, &c); + } } err = nlmsg_parse(nlh, xfrm_msg_min[type], attrs, XFRMA_MAX, -- cgit v1.2.3 From 544d5c7d9f4d1ec4f170bc5bcc522012cb7704bc Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 5 Feb 2012 03:44:51 +0100 Subject: netfilter: ctnetlink: allow to set expectfn for expectations This patch allows you to set expectfn which is specifically used by the NAT side of most of the existing conntrack helpers. I have added a symbol map that uses a string as key to look up for the function that is attached to the expectation object. This is the best solution I came out with to solve this issue. Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nfnetlink_conntrack.h | 1 + include/net/netfilter/nf_conntrack_helper.h | 13 +++++++ net/ipv4/netfilter/nf_nat_core.c | 8 ++++ net/ipv4/netfilter/nf_nat_h323.c | 14 +++++++ net/ipv4/netfilter/nf_nat_sip.c | 7 ++++ net/netfilter/nf_conntrack_helper.c | 54 +++++++++++++++++++++++++++ net/netfilter/nf_conntrack_netlink.c | 19 +++++++++- 7 files changed, 115 insertions(+), 1 deletion(-) (limited to 'net/ipv4') diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h index a2f1f483ecc9..e58e4b93c108 100644 --- a/include/linux/netfilter/nfnetlink_conntrack.h +++ b/include/linux/netfilter/nfnetlink_conntrack.h @@ -175,6 +175,7 @@ enum ctattr_expect { CTA_EXPECT_FLAGS, CTA_EXPECT_CLASS, CTA_EXPECT_NAT, + CTA_EXPECT_FN, __CTA_EXPECT_MAX }; #define CTA_EXPECT_MAX (__CTA_EXPECT_MAX - 1) diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index f1c1311adc2c..5767dc242dee 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -69,4 +69,17 @@ extern int nf_conntrack_broadcast_help(struct sk_buff *skb, enum ip_conntrack_info ctinfo, unsigned int timeout); +struct nf_ct_helper_expectfn { + struct list_head head; + const char *name; + void (*expectfn)(struct nf_conn *ct, struct nf_conntrack_expect *exp); +}; + +void nf_ct_helper_expectfn_register(struct nf_ct_helper_expectfn *n); +void nf_ct_helper_expectfn_unregister(struct nf_ct_helper_expectfn *n); +struct nf_ct_helper_expectfn * +nf_ct_helper_expectfn_find_by_name(const char *name); +struct nf_ct_helper_expectfn * +nf_ct_helper_expectfn_find_by_symbol(const void *symbol); + #endif /*_NF_CONNTRACK_HELPER_H*/ diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index a708933dc230..abb52adf5acd 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -686,6 +686,11 @@ static struct pernet_operations nf_nat_net_ops = { .exit = nf_nat_net_exit, }; +static struct nf_ct_helper_expectfn follow_master_nat = { + .name = "nat-follow-master", + .expectfn = nf_nat_follow_master, +}; + static int __init nf_nat_init(void) { size_t i; @@ -717,6 +722,8 @@ static int __init nf_nat_init(void) l3proto = nf_ct_l3proto_find_get((u_int16_t)AF_INET); + nf_ct_helper_expectfn_register(&follow_master_nat); + BUG_ON(nf_nat_seq_adjust_hook != NULL); RCU_INIT_POINTER(nf_nat_seq_adjust_hook, nf_nat_seq_adjust); BUG_ON(nfnetlink_parse_nat_setup_hook != NULL); @@ -736,6 +743,7 @@ static void __exit nf_nat_cleanup(void) unregister_pernet_subsys(&nf_nat_net_ops); nf_ct_l3proto_put(l3proto); nf_ct_extend_unregister(&nat_extend); + nf_ct_helper_expectfn_unregister(&follow_master_nat); RCU_INIT_POINTER(nf_nat_seq_adjust_hook, NULL); RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook, NULL); RCU_INIT_POINTER(nf_ct_nat_offset, NULL); diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c index dc1dd912baf4..82536701e3a3 100644 --- a/net/ipv4/netfilter/nf_nat_h323.c +++ b/net/ipv4/netfilter/nf_nat_h323.c @@ -568,6 +568,16 @@ static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct, return 0; } +static struct nf_ct_helper_expectfn q931_nat = { + .name = "Q.931", + .expectfn = ip_nat_q931_expect, +}; + +static struct nf_ct_helper_expectfn callforwarding_nat = { + .name = "callforwarding", + .expectfn = ip_nat_callforwarding_expect, +}; + /****************************************************************************/ static int __init init(void) { @@ -590,6 +600,8 @@ static int __init init(void) RCU_INIT_POINTER(nat_h245_hook, nat_h245); RCU_INIT_POINTER(nat_callforwarding_hook, nat_callforwarding); RCU_INIT_POINTER(nat_q931_hook, nat_q931); + nf_ct_helper_expectfn_register(&q931_nat); + nf_ct_helper_expectfn_register(&callforwarding_nat); return 0; } @@ -605,6 +617,8 @@ static void __exit fini(void) RCU_INIT_POINTER(nat_h245_hook, NULL); RCU_INIT_POINTER(nat_callforwarding_hook, NULL); RCU_INIT_POINTER(nat_q931_hook, NULL); + nf_ct_helper_expectfn_unregister(&q931_nat); + nf_ct_helper_expectfn_unregister(&callforwarding_nat); synchronize_rcu(); } diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c index d0319f96269f..57932c43960e 100644 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ b/net/ipv4/netfilter/nf_nat_sip.c @@ -526,6 +526,11 @@ err1: return NF_DROP; } +static struct nf_ct_helper_expectfn sip_nat = { + .name = "sip", + .expectfn = ip_nat_sip_expected, +}; + static void __exit nf_nat_sip_fini(void) { RCU_INIT_POINTER(nf_nat_sip_hook, NULL); @@ -535,6 +540,7 @@ static void __exit nf_nat_sip_fini(void) RCU_INIT_POINTER(nf_nat_sdp_port_hook, NULL); RCU_INIT_POINTER(nf_nat_sdp_session_hook, NULL); RCU_INIT_POINTER(nf_nat_sdp_media_hook, NULL); + nf_ct_helper_expectfn_unregister(&sip_nat); synchronize_rcu(); } @@ -554,6 +560,7 @@ static int __init nf_nat_sip_init(void) RCU_INIT_POINTER(nf_nat_sdp_port_hook, ip_nat_sdp_port); RCU_INIT_POINTER(nf_nat_sdp_session_hook, ip_nat_sdp_session); RCU_INIT_POINTER(nf_nat_sdp_media_hook, ip_nat_sdp_media); + nf_ct_helper_expectfn_register(&sip_nat); return 0; } diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index bbe23baa19b6..436b7cb79ba4 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -181,6 +181,60 @@ void nf_ct_helper_destroy(struct nf_conn *ct) } } +static LIST_HEAD(nf_ct_helper_expectfn_list); + +void nf_ct_helper_expectfn_register(struct nf_ct_helper_expectfn *n) +{ + spin_lock_bh(&nf_conntrack_lock); + list_add_rcu(&n->head, &nf_ct_helper_expectfn_list); + spin_unlock_bh(&nf_conntrack_lock); +} +EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_register); + +void nf_ct_helper_expectfn_unregister(struct nf_ct_helper_expectfn *n) +{ + spin_lock_bh(&nf_conntrack_lock); + list_del_rcu(&n->head); + spin_unlock_bh(&nf_conntrack_lock); +} +EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_unregister); + +struct nf_ct_helper_expectfn * +nf_ct_helper_expectfn_find_by_name(const char *name) +{ + struct nf_ct_helper_expectfn *cur; + bool found = false; + + rcu_read_lock(); + list_for_each_entry_rcu(cur, &nf_ct_helper_expectfn_list, head) { + if (!strcmp(cur->name, name)) { + found = true; + break; + } + } + rcu_read_unlock(); + return found ? cur : NULL; +} +EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_find_by_name); + +struct nf_ct_helper_expectfn * +nf_ct_helper_expectfn_find_by_symbol(const void *symbol) +{ + struct nf_ct_helper_expectfn *cur; + bool found = false; + + rcu_read_lock(); + list_for_each_entry_rcu(cur, &nf_ct_helper_expectfn_list, head) { + if (cur->expectfn == symbol) { + found = true; + break; + } + } + rcu_read_unlock(); + return found ? cur : NULL; +} +EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_find_by_symbol); + int nf_conntrack_helper_register(struct nf_conntrack_helper *me) { unsigned int h = helper_hash(&me->tuple); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 845c8ca28563..b8827e8a11fe 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1679,6 +1679,8 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb, struct nlattr *nest_parms; struct nf_conntrack_tuple nat_tuple = {}; #endif + struct nf_ct_helper_expectfn *expfn; + if (timeout < 0) timeout = 0; @@ -1722,6 +1724,9 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb, if (helper) NLA_PUT_STRING(skb, CTA_EXPECT_HELP_NAME, helper->name); } + expfn = nf_ct_helper_expectfn_find_by_symbol(exp->expectfn); + if (expfn != NULL) + NLA_PUT_STRING(skb, CTA_EXPECT_FN, expfn->name); return 0; @@ -1881,6 +1886,7 @@ static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = { [CTA_EXPECT_FLAGS] = { .type = NLA_U32 }, [CTA_EXPECT_CLASS] = { .type = NLA_U32 }, [CTA_EXPECT_NAT] = { .type = NLA_NESTED }, + [CTA_EXPECT_FN] = { .type = NLA_NUL_STRING }, }; static int @@ -2182,9 +2188,20 @@ ctnetlink_create_expect(struct net *net, u16 zone, } else exp->flags = 0; } + if (cda[CTA_EXPECT_FN]) { + const char *name = nla_data(cda[CTA_EXPECT_FN]); + struct nf_ct_helper_expectfn *expfn; + + expfn = nf_ct_helper_expectfn_find_by_name(name); + if (expfn == NULL) { + err = -EINVAL; + goto err_out; + } + exp->expectfn = expfn->expectfn; + } else + exp->expectfn = NULL; exp->class = class; - exp->expectfn = NULL; exp->master = ct; exp->helper = helper; memcpy(&exp->tuple, &tuple, sizeof(struct nf_conntrack_tuple)); -- cgit v1.2.3 From 6939c33a757bd006c5e0b8b5fd429fc587a4d0f4 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Fri, 10 Feb 2012 23:10:52 +0100 Subject: netfilter: merge ipt_LOG and ip6_LOG into xt_LOG ipt_LOG and ip6_LOG have a lot of common code, merge them to reduce duplicate code. Signed-off-by: Richard Weinberger Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/Kbuild | 1 + include/linux/netfilter/xt_LOG.h | 19 + include/linux/netfilter_ipv4/ipt_LOG.h | 2 + include/linux/netfilter_ipv6/ip6t_LOG.h | 2 + net/ipv4/netfilter/Kconfig | 9 - net/ipv4/netfilter/Makefile | 1 - net/ipv4/netfilter/ipt_LOG.c | 516 ------------------ net/ipv6/netfilter/Kconfig | 9 - net/ipv6/netfilter/Makefile | 1 - net/ipv6/netfilter/ip6t_LOG.c | 527 ------------------ net/netfilter/Kconfig | 9 + net/netfilter/Makefile | 1 + net/netfilter/xt_LOG.c | 921 ++++++++++++++++++++++++++++++++ 13 files changed, 955 insertions(+), 1063 deletions(-) create mode 100644 include/linux/netfilter/xt_LOG.h delete mode 100644 net/ipv4/netfilter/ipt_LOG.c delete mode 100644 net/ipv6/netfilter/ip6t_LOG.c create mode 100644 net/netfilter/xt_LOG.c (limited to 'net/ipv4') diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild index e144f54185c0..aaa72aaa21de 100644 --- a/include/linux/netfilter/Kbuild +++ b/include/linux/netfilter/Kbuild @@ -22,6 +22,7 @@ header-y += xt_CT.h header-y += xt_DSCP.h header-y += xt_IDLETIMER.h header-y += xt_LED.h +header-y += xt_LOG.h header-y += xt_MARK.h header-y += xt_nfacct.h header-y += xt_NFLOG.h diff --git a/include/linux/netfilter/xt_LOG.h b/include/linux/netfilter/xt_LOG.h new file mode 100644 index 000000000000..cac079095305 --- /dev/null +++ b/include/linux/netfilter/xt_LOG.h @@ -0,0 +1,19 @@ +#ifndef _XT_LOG_H +#define _XT_LOG_H + +/* make sure not to change this without changing nf_log.h:NF_LOG_* (!) */ +#define XT_LOG_TCPSEQ 0x01 /* Log TCP sequence numbers */ +#define XT_LOG_TCPOPT 0x02 /* Log TCP options */ +#define XT_LOG_IPOPT 0x04 /* Log IP options */ +#define XT_LOG_UID 0x08 /* Log UID owning local socket */ +#define XT_LOG_NFLOG 0x10 /* Unsupported, don't reuse */ +#define XT_LOG_MACDECODE 0x20 /* Decode MAC header */ +#define XT_LOG_MASK 0x2f + +struct xt_log_info { + unsigned char level; + unsigned char logflags; + char prefix[30]; +}; + +#endif /* _XT_LOG_H */ diff --git a/include/linux/netfilter_ipv4/ipt_LOG.h b/include/linux/netfilter_ipv4/ipt_LOG.h index dcdbadf9fd4a..5d8152077d71 100644 --- a/include/linux/netfilter_ipv4/ipt_LOG.h +++ b/include/linux/netfilter_ipv4/ipt_LOG.h @@ -1,6 +1,8 @@ #ifndef _IPT_LOG_H #define _IPT_LOG_H +#warning "Please update iptables, this file will be removed soon!" + /* make sure not to change this without changing netfilter.h:NF_LOG_* (!) */ #define IPT_LOG_TCPSEQ 0x01 /* Log TCP sequence numbers */ #define IPT_LOG_TCPOPT 0x02 /* Log TCP options */ diff --git a/include/linux/netfilter_ipv6/ip6t_LOG.h b/include/linux/netfilter_ipv6/ip6t_LOG.h index 9dd5579e02ec..3dd0bc4e0735 100644 --- a/include/linux/netfilter_ipv6/ip6t_LOG.h +++ b/include/linux/netfilter_ipv6/ip6t_LOG.h @@ -1,6 +1,8 @@ #ifndef _IP6T_LOG_H #define _IP6T_LOG_H +#warning "Please update iptables, this file will be removed soon!" + /* make sure not to change this without changing netfilter.h:NF_LOG_* (!) */ #define IP6T_LOG_TCPSEQ 0x01 /* Log TCP sequence numbers */ #define IP6T_LOG_TCPOPT 0x02 /* Log TCP options */ diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 74dfc9e5211f..fcc543cd987a 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -123,15 +123,6 @@ config IP_NF_TARGET_REJECT To compile it as a module, choose M here. If unsure, say N. -config IP_NF_TARGET_LOG - tristate "LOG target support" - default m if NETFILTER_ADVANCED=n - help - This option adds a `LOG' target, which allows you to create rules in - any iptables table which records the packet header to the syslog. - - To compile it as a module, choose M here. If unsure, say N. - config IP_NF_TARGET_ULOG tristate "ULOG target support" default m if NETFILTER_ADVANCED=n diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 213a462b739b..240b68469a7a 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -54,7 +54,6 @@ obj-$(CONFIG_IP_NF_MATCH_RPFILTER) += ipt_rpfilter.o # targets obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o -obj-$(CONFIG_IP_NF_TARGET_LOG) += ipt_LOG.o obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c deleted file mode 100644 index d76d6c9ed946..000000000000 --- a/net/ipv4/netfilter/ipt_LOG.c +++ /dev/null @@ -1,516 +0,0 @@ -/* - * This is a module which is used for logging packets. - */ - -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2004 Netfilter Core Team - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Netfilter Core Team "); -MODULE_DESCRIPTION("Xtables: IPv4 packet logging to syslog"); - -/* One level of recursion won't kill us */ -static void dump_packet(struct sbuff *m, - const struct nf_loginfo *info, - const struct sk_buff *skb, - unsigned int iphoff) -{ - struct iphdr _iph; - const struct iphdr *ih; - unsigned int logflags; - - if (info->type == NF_LOG_TYPE_LOG) - logflags = info->u.log.logflags; - else - logflags = NF_LOG_MASK; - - ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph); - if (ih == NULL) { - sb_add(m, "TRUNCATED"); - return; - } - - /* Important fields: - * TOS, len, DF/MF, fragment offset, TTL, src, dst, options. */ - /* Max length: 40 "SRC=255.255.255.255 DST=255.255.255.255 " */ - sb_add(m, "SRC=%pI4 DST=%pI4 ", - &ih->saddr, &ih->daddr); - - /* Max length: 46 "LEN=65535 TOS=0xFF PREC=0xFF TTL=255 ID=65535 " */ - sb_add(m, "LEN=%u TOS=0x%02X PREC=0x%02X TTL=%u ID=%u ", - ntohs(ih->tot_len), ih->tos & IPTOS_TOS_MASK, - ih->tos & IPTOS_PREC_MASK, ih->ttl, ntohs(ih->id)); - - /* Max length: 6 "CE DF MF " */ - if (ntohs(ih->frag_off) & IP_CE) - sb_add(m, "CE "); - if (ntohs(ih->frag_off) & IP_DF) - sb_add(m, "DF "); - if (ntohs(ih->frag_off) & IP_MF) - sb_add(m, "MF "); - - /* Max length: 11 "FRAG:65535 " */ - if (ntohs(ih->frag_off) & IP_OFFSET) - sb_add(m, "FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET); - - if ((logflags & IPT_LOG_IPOPT) && - ih->ihl * 4 > sizeof(struct iphdr)) { - const unsigned char *op; - unsigned char _opt[4 * 15 - sizeof(struct iphdr)]; - unsigned int i, optsize; - - optsize = ih->ihl * 4 - sizeof(struct iphdr); - op = skb_header_pointer(skb, iphoff+sizeof(_iph), - optsize, _opt); - if (op == NULL) { - sb_add(m, "TRUNCATED"); - return; - } - - /* Max length: 127 "OPT (" 15*4*2chars ") " */ - sb_add(m, "OPT ("); - for (i = 0; i < optsize; i++) - sb_add(m, "%02X", op[i]); - sb_add(m, ") "); - } - - switch (ih->protocol) { - case IPPROTO_TCP: { - struct tcphdr _tcph; - const struct tcphdr *th; - - /* Max length: 10 "PROTO=TCP " */ - sb_add(m, "PROTO=TCP "); - - if (ntohs(ih->frag_off) & IP_OFFSET) - break; - - /* Max length: 25 "INCOMPLETE [65535 bytes] " */ - th = skb_header_pointer(skb, iphoff + ih->ihl * 4, - sizeof(_tcph), &_tcph); - if (th == NULL) { - sb_add(m, "INCOMPLETE [%u bytes] ", - skb->len - iphoff - ih->ihl*4); - break; - } - - /* Max length: 20 "SPT=65535 DPT=65535 " */ - sb_add(m, "SPT=%u DPT=%u ", - ntohs(th->source), ntohs(th->dest)); - /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */ - if (logflags & IPT_LOG_TCPSEQ) - sb_add(m, "SEQ=%u ACK=%u ", - ntohl(th->seq), ntohl(th->ack_seq)); - /* Max length: 13 "WINDOW=65535 " */ - sb_add(m, "WINDOW=%u ", ntohs(th->window)); - /* Max length: 9 "RES=0x3F " */ - sb_add(m, "RES=0x%02x ", (u8)(ntohl(tcp_flag_word(th) & TCP_RESERVED_BITS) >> 22)); - /* Max length: 32 "CWR ECE URG ACK PSH RST SYN FIN " */ - if (th->cwr) - sb_add(m, "CWR "); - if (th->ece) - sb_add(m, "ECE "); - if (th->urg) - sb_add(m, "URG "); - if (th->ack) - sb_add(m, "ACK "); - if (th->psh) - sb_add(m, "PSH "); - if (th->rst) - sb_add(m, "RST "); - if (th->syn) - sb_add(m, "SYN "); - if (th->fin) - sb_add(m, "FIN "); - /* Max length: 11 "URGP=65535 " */ - sb_add(m, "URGP=%u ", ntohs(th->urg_ptr)); - - if ((logflags & IPT_LOG_TCPOPT) && - th->doff * 4 > sizeof(struct tcphdr)) { - unsigned char _opt[4 * 15 - sizeof(struct tcphdr)]; - const unsigned char *op; - unsigned int i, optsize; - - optsize = th->doff * 4 - sizeof(struct tcphdr); - op = skb_header_pointer(skb, - iphoff+ih->ihl*4+sizeof(_tcph), - optsize, _opt); - if (op == NULL) { - sb_add(m, "TRUNCATED"); - return; - } - - /* Max length: 127 "OPT (" 15*4*2chars ") " */ - sb_add(m, "OPT ("); - for (i = 0; i < optsize; i++) - sb_add(m, "%02X", op[i]); - sb_add(m, ") "); - } - break; - } - case IPPROTO_UDP: - case IPPROTO_UDPLITE: { - struct udphdr _udph; - const struct udphdr *uh; - - if (ih->protocol == IPPROTO_UDP) - /* Max length: 10 "PROTO=UDP " */ - sb_add(m, "PROTO=UDP " ); - else /* Max length: 14 "PROTO=UDPLITE " */ - sb_add(m, "PROTO=UDPLITE "); - - if (ntohs(ih->frag_off) & IP_OFFSET) - break; - - /* Max length: 25 "INCOMPLETE [65535 bytes] " */ - uh = skb_header_pointer(skb, iphoff+ih->ihl*4, - sizeof(_udph), &_udph); - if (uh == NULL) { - sb_add(m, "INCOMPLETE [%u bytes] ", - skb->len - iphoff - ih->ihl*4); - break; - } - - /* Max length: 20 "SPT=65535 DPT=65535 " */ - sb_add(m, "SPT=%u DPT=%u LEN=%u ", - ntohs(uh->source), ntohs(uh->dest), - ntohs(uh->len)); - break; - } - case IPPROTO_ICMP: { - struct icmphdr _icmph; - const struct icmphdr *ich; - static const size_t required_len[NR_ICMP_TYPES+1] - = { [ICMP_ECHOREPLY] = 4, - [ICMP_DEST_UNREACH] - = 8 + sizeof(struct iphdr), - [ICMP_SOURCE_QUENCH] - = 8 + sizeof(struct iphdr), - [ICMP_REDIRECT] - = 8 + sizeof(struct iphdr), - [ICMP_ECHO] = 4, - [ICMP_TIME_EXCEEDED] - = 8 + sizeof(struct iphdr), - [ICMP_PARAMETERPROB] - = 8 + sizeof(struct iphdr), - [ICMP_TIMESTAMP] = 20, - [ICMP_TIMESTAMPREPLY] = 20, - [ICMP_ADDRESS] = 12, - [ICMP_ADDRESSREPLY] = 12 }; - - /* Max length: 11 "PROTO=ICMP " */ - sb_add(m, "PROTO=ICMP "); - - if (ntohs(ih->frag_off) & IP_OFFSET) - break; - - /* Max length: 25 "INCOMPLETE [65535 bytes] " */ - ich = skb_header_pointer(skb, iphoff + ih->ihl * 4, - sizeof(_icmph), &_icmph); - if (ich == NULL) { - sb_add(m, "INCOMPLETE [%u bytes] ", - skb->len - iphoff - ih->ihl*4); - break; - } - - /* Max length: 18 "TYPE=255 CODE=255 " */ - sb_add(m, "TYPE=%u CODE=%u ", ich->type, ich->code); - - /* Max length: 25 "INCOMPLETE [65535 bytes] " */ - if (ich->type <= NR_ICMP_TYPES && - required_len[ich->type] && - skb->len-iphoff-ih->ihl*4 < required_len[ich->type]) { - sb_add(m, "INCOMPLETE [%u bytes] ", - skb->len - iphoff - ih->ihl*4); - break; - } - - switch (ich->type) { - case ICMP_ECHOREPLY: - case ICMP_ECHO: - /* Max length: 19 "ID=65535 SEQ=65535 " */ - sb_add(m, "ID=%u SEQ=%u ", - ntohs(ich->un.echo.id), - ntohs(ich->un.echo.sequence)); - break; - - case ICMP_PARAMETERPROB: - /* Max length: 14 "PARAMETER=255 " */ - sb_add(m, "PARAMETER=%u ", - ntohl(ich->un.gateway) >> 24); - break; - case ICMP_REDIRECT: - /* Max length: 24 "GATEWAY=255.255.255.255 " */ - sb_add(m, "GATEWAY=%pI4 ", &ich->un.gateway); - /* Fall through */ - case ICMP_DEST_UNREACH: - case ICMP_SOURCE_QUENCH: - case ICMP_TIME_EXCEEDED: - /* Max length: 3+maxlen */ - if (!iphoff) { /* Only recurse once. */ - sb_add(m, "["); - dump_packet(m, info, skb, - iphoff + ih->ihl*4+sizeof(_icmph)); - sb_add(m, "] "); - } - - /* Max length: 10 "MTU=65535 " */ - if (ich->type == ICMP_DEST_UNREACH && - ich->code == ICMP_FRAG_NEEDED) - sb_add(m, "MTU=%u ", ntohs(ich->un.frag.mtu)); - } - break; - } - /* Max Length */ - case IPPROTO_AH: { - struct ip_auth_hdr _ahdr; - const struct ip_auth_hdr *ah; - - if (ntohs(ih->frag_off) & IP_OFFSET) - break; - - /* Max length: 9 "PROTO=AH " */ - sb_add(m, "PROTO=AH "); - - /* Max length: 25 "INCOMPLETE [65535 bytes] " */ - ah = skb_header_pointer(skb, iphoff+ih->ihl*4, - sizeof(_ahdr), &_ahdr); - if (ah == NULL) { - sb_add(m, "INCOMPLETE [%u bytes] ", - skb->len - iphoff - ih->ihl*4); - break; - } - - /* Length: 15 "SPI=0xF1234567 " */ - sb_add(m, "SPI=0x%x ", ntohl(ah->spi)); - break; - } - case IPPROTO_ESP: { - struct ip_esp_hdr _esph; - const struct ip_esp_hdr *eh; - - /* Max length: 10 "PROTO=ESP " */ - sb_add(m, "PROTO=ESP "); - - if (ntohs(ih->frag_off) & IP_OFFSET) - break; - - /* Max length: 25 "INCOMPLETE [65535 bytes] " */ - eh = skb_header_pointer(skb, iphoff+ih->ihl*4, - sizeof(_esph), &_esph); - if (eh == NULL) { - sb_add(m, "INCOMPLETE [%u bytes] ", - skb->len - iphoff - ih->ihl*4); - break; - } - - /* Length: 15 "SPI=0xF1234567 " */ - sb_add(m, "SPI=0x%x ", ntohl(eh->spi)); - break; - } - /* Max length: 10 "PROTO 255 " */ - default: - sb_add(m, "PROTO=%u ", ih->protocol); - } - - /* Max length: 15 "UID=4294967295 " */ - if ((logflags & IPT_LOG_UID) && !iphoff && skb->sk) { - read_lock_bh(&skb->sk->sk_callback_lock); - if (skb->sk->sk_socket && skb->sk->sk_socket->file) - sb_add(m, "UID=%u GID=%u ", - skb->sk->sk_socket->file->f_cred->fsuid, - skb->sk->sk_socket->file->f_cred->fsgid); - read_unlock_bh(&skb->sk->sk_callback_lock); - } - - /* Max length: 16 "MARK=0xFFFFFFFF " */ - if (!iphoff && skb->mark) - sb_add(m, "MARK=0x%x ", skb->mark); - - /* Proto Max log string length */ - /* IP: 40+46+6+11+127 = 230 */ - /* TCP: 10+max(25,20+30+13+9+32+11+127) = 252 */ - /* UDP: 10+max(25,20) = 35 */ - /* UDPLITE: 14+max(25,20) = 39 */ - /* ICMP: 11+max(25, 18+25+max(19,14,24+3+n+10,3+n+10)) = 91+n */ - /* ESP: 10+max(25)+15 = 50 */ - /* AH: 9+max(25)+15 = 49 */ - /* unknown: 10 */ - - /* (ICMP allows recursion one level deep) */ - /* maxlen = IP + ICMP + IP + max(TCP,UDP,ICMP,unknown) */ - /* maxlen = 230+ 91 + 230 + 252 = 803 */ -} - -static void dump_mac_header(struct sbuff *m, - const struct nf_loginfo *info, - const struct sk_buff *skb) -{ - struct net_device *dev = skb->dev; - unsigned int logflags = 0; - - if (info->type == NF_LOG_TYPE_LOG) - logflags = info->u.log.logflags; - - if (!(logflags & IPT_LOG_MACDECODE)) - goto fallback; - - switch (dev->type) { - case ARPHRD_ETHER: - sb_add(m, "MACSRC=%pM MACDST=%pM MACPROTO=%04x ", - eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest, - ntohs(eth_hdr(skb)->h_proto)); - return; - default: - break; - } - -fallback: - sb_add(m, "MAC="); - if (dev->hard_header_len && - skb->mac_header != skb->network_header) { - const unsigned char *p = skb_mac_header(skb); - unsigned int i; - - sb_add(m, "%02x", *p++); - for (i = 1; i < dev->hard_header_len; i++, p++) - sb_add(m, ":%02x", *p); - } - sb_add(m, " "); -} - -static struct nf_loginfo default_loginfo = { - .type = NF_LOG_TYPE_LOG, - .u = { - .log = { - .level = 5, - .logflags = NF_LOG_MASK, - }, - }, -}; - -static void -ipt_log_packet(u_int8_t pf, - unsigned int hooknum, - const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct nf_loginfo *loginfo, - const char *prefix) -{ - struct sbuff *m = sb_open(); - - if (!loginfo) - loginfo = &default_loginfo; - - sb_add(m, "<%d>%sIN=%s OUT=%s ", loginfo->u.log.level, - prefix, - in ? in->name : "", - out ? out->name : ""); -#ifdef CONFIG_BRIDGE_NETFILTER - if (skb->nf_bridge) { - const struct net_device *physindev; - const struct net_device *physoutdev; - - physindev = skb->nf_bridge->physindev; - if (physindev && in != physindev) - sb_add(m, "PHYSIN=%s ", physindev->name); - physoutdev = skb->nf_bridge->physoutdev; - if (physoutdev && out != physoutdev) - sb_add(m, "PHYSOUT=%s ", physoutdev->name); - } -#endif - - if (in != NULL) - dump_mac_header(m, loginfo, skb); - - dump_packet(m, loginfo, skb, 0); - - sb_close(m); -} - -static unsigned int -log_tg(struct sk_buff *skb, const struct xt_action_param *par) -{ - const struct ipt_log_info *loginfo = par->targinfo; - struct nf_loginfo li; - - li.type = NF_LOG_TYPE_LOG; - li.u.log.level = loginfo->level; - li.u.log.logflags = loginfo->logflags; - - ipt_log_packet(NFPROTO_IPV4, par->hooknum, skb, par->in, par->out, &li, - loginfo->prefix); - return XT_CONTINUE; -} - -static int log_tg_check(const struct xt_tgchk_param *par) -{ - const struct ipt_log_info *loginfo = par->targinfo; - - if (loginfo->level >= 8) { - pr_debug("level %u >= 8\n", loginfo->level); - return -EINVAL; - } - if (loginfo->prefix[sizeof(loginfo->prefix)-1] != '\0') { - pr_debug("prefix is not null-terminated\n"); - return -EINVAL; - } - return 0; -} - -static struct xt_target log_tg_reg __read_mostly = { - .name = "LOG", - .family = NFPROTO_IPV4, - .target = log_tg, - .targetsize = sizeof(struct ipt_log_info), - .checkentry = log_tg_check, - .me = THIS_MODULE, -}; - -static struct nf_logger ipt_log_logger __read_mostly = { - .name = "ipt_LOG", - .logfn = &ipt_log_packet, - .me = THIS_MODULE, -}; - -static int __init log_tg_init(void) -{ - int ret; - - ret = xt_register_target(&log_tg_reg); - if (ret < 0) - return ret; - nf_log_register(NFPROTO_IPV4, &ipt_log_logger); - return 0; -} - -static void __exit log_tg_exit(void) -{ - nf_log_unregister(&ipt_log_logger); - xt_unregister_target(&log_tg_reg); -} - -module_init(log_tg_init); -module_exit(log_tg_exit); diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 9a68fb5b9e77..d33cddd16fbb 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -154,15 +154,6 @@ config IP6_NF_TARGET_HL (e.g. when running oldconfig). It selects CONFIG_NETFILTER_XT_TARGET_HL. -config IP6_NF_TARGET_LOG - tristate "LOG target support" - default m if NETFILTER_ADVANCED=n - help - This option adds a `LOG' target, which allows you to create rules in - any iptables table which records the packet header to the syslog. - - To compile it as a module, choose M here. If unsure, say N. - config IP6_NF_FILTER tristate "Packet filtering" default m if NETFILTER_ADVANCED=n diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 2eaed96db02c..d4dfd0a21097 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -31,5 +31,4 @@ obj-$(CONFIG_IP6_NF_MATCH_RPFILTER) += ip6t_rpfilter.o obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o # targets -obj-$(CONFIG_IP6_NF_TARGET_LOG) += ip6t_LOG.o obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c deleted file mode 100644 index e6af8d72f26b..000000000000 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ /dev/null @@ -1,527 +0,0 @@ -/* - * This is a module which is used for logging packets. - */ - -/* (C) 2001 Jan Rekorajski - * (C) 2002-2004 Netfilter Core Team - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -MODULE_AUTHOR("Jan Rekorajski "); -MODULE_DESCRIPTION("Xtables: IPv6 packet logging to syslog"); -MODULE_LICENSE("GPL"); - -struct in_device; -#include -#include - -/* One level of recursion won't kill us */ -static void dump_packet(struct sbuff *m, - const struct nf_loginfo *info, - const struct sk_buff *skb, unsigned int ip6hoff, - int recurse) -{ - u_int8_t currenthdr; - int fragment; - struct ipv6hdr _ip6h; - const struct ipv6hdr *ih; - unsigned int ptr; - unsigned int hdrlen = 0; - unsigned int logflags; - - if (info->type == NF_LOG_TYPE_LOG) - logflags = info->u.log.logflags; - else - logflags = NF_LOG_MASK; - - ih = skb_header_pointer(skb, ip6hoff, sizeof(_ip6h), &_ip6h); - if (ih == NULL) { - sb_add(m, "TRUNCATED"); - return; - } - - /* Max length: 88 "SRC=0000.0000.0000.0000.0000.0000.0000.0000 DST=0000.0000.0000.0000.0000.0000.0000.0000 " */ - sb_add(m, "SRC=%pI6 DST=%pI6 ", &ih->saddr, &ih->daddr); - - /* Max length: 44 "LEN=65535 TC=255 HOPLIMIT=255 FLOWLBL=FFFFF " */ - sb_add(m, "LEN=%Zu TC=%u HOPLIMIT=%u FLOWLBL=%u ", - ntohs(ih->payload_len) + sizeof(struct ipv6hdr), - (ntohl(*(__be32 *)ih) & 0x0ff00000) >> 20, - ih->hop_limit, - (ntohl(*(__be32 *)ih) & 0x000fffff)); - - fragment = 0; - ptr = ip6hoff + sizeof(struct ipv6hdr); - currenthdr = ih->nexthdr; - while (currenthdr != NEXTHDR_NONE && ip6t_ext_hdr(currenthdr)) { - struct ipv6_opt_hdr _hdr; - const struct ipv6_opt_hdr *hp; - - hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr); - if (hp == NULL) { - sb_add(m, "TRUNCATED"); - return; - } - - /* Max length: 48 "OPT (...) " */ - if (logflags & IP6T_LOG_IPOPT) - sb_add(m, "OPT ( "); - - switch (currenthdr) { - case IPPROTO_FRAGMENT: { - struct frag_hdr _fhdr; - const struct frag_hdr *fh; - - sb_add(m, "FRAG:"); - fh = skb_header_pointer(skb, ptr, sizeof(_fhdr), - &_fhdr); - if (fh == NULL) { - sb_add(m, "TRUNCATED "); - return; - } - - /* Max length: 6 "65535 " */ - sb_add(m, "%u ", ntohs(fh->frag_off) & 0xFFF8); - - /* Max length: 11 "INCOMPLETE " */ - if (fh->frag_off & htons(0x0001)) - sb_add(m, "INCOMPLETE "); - - sb_add(m, "ID:%08x ", ntohl(fh->identification)); - - if (ntohs(fh->frag_off) & 0xFFF8) - fragment = 1; - - hdrlen = 8; - - break; - } - case IPPROTO_DSTOPTS: - case IPPROTO_ROUTING: - case IPPROTO_HOPOPTS: - if (fragment) { - if (logflags & IP6T_LOG_IPOPT) - sb_add(m, ")"); - return; - } - hdrlen = ipv6_optlen(hp); - break; - /* Max Length */ - case IPPROTO_AH: - if (logflags & IP6T_LOG_IPOPT) { - struct ip_auth_hdr _ahdr; - const struct ip_auth_hdr *ah; - - /* Max length: 3 "AH " */ - sb_add(m, "AH "); - - if (fragment) { - sb_add(m, ")"); - return; - } - - ah = skb_header_pointer(skb, ptr, sizeof(_ahdr), - &_ahdr); - if (ah == NULL) { - /* - * Max length: 26 "INCOMPLETE [65535 - * bytes] )" - */ - sb_add(m, "INCOMPLETE [%u bytes] )", - skb->len - ptr); - return; - } - - /* Length: 15 "SPI=0xF1234567 */ - sb_add(m, "SPI=0x%x ", ntohl(ah->spi)); - - } - - hdrlen = (hp->hdrlen+2)<<2; - break; - case IPPROTO_ESP: - if (logflags & IP6T_LOG_IPOPT) { - struct ip_esp_hdr _esph; - const struct ip_esp_hdr *eh; - - /* Max length: 4 "ESP " */ - sb_add(m, "ESP "); - - if (fragment) { - sb_add(m, ")"); - return; - } - - /* - * Max length: 26 "INCOMPLETE [65535 bytes] )" - */ - eh = skb_header_pointer(skb, ptr, sizeof(_esph), - &_esph); - if (eh == NULL) { - sb_add(m, "INCOMPLETE [%u bytes] )", - skb->len - ptr); - return; - } - - /* Length: 16 "SPI=0xF1234567 )" */ - sb_add(m, "SPI=0x%x )", ntohl(eh->spi) ); - - } - return; - default: - /* Max length: 20 "Unknown Ext Hdr 255" */ - sb_add(m, "Unknown Ext Hdr %u", currenthdr); - return; - } - if (logflags & IP6T_LOG_IPOPT) - sb_add(m, ") "); - - currenthdr = hp->nexthdr; - ptr += hdrlen; - } - - switch (currenthdr) { - case IPPROTO_TCP: { - struct tcphdr _tcph; - const struct tcphdr *th; - - /* Max length: 10 "PROTO=TCP " */ - sb_add(m, "PROTO=TCP "); - - if (fragment) - break; - - /* Max length: 25 "INCOMPLETE [65535 bytes] " */ - th = skb_header_pointer(skb, ptr, sizeof(_tcph), &_tcph); - if (th == NULL) { - sb_add(m, "INCOMPLETE [%u bytes] ", skb->len - ptr); - return; - } - - /* Max length: 20 "SPT=65535 DPT=65535 " */ - sb_add(m, "SPT=%u DPT=%u ", - ntohs(th->source), ntohs(th->dest)); - /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */ - if (logflags & IP6T_LOG_TCPSEQ) - sb_add(m, "SEQ=%u ACK=%u ", - ntohl(th->seq), ntohl(th->ack_seq)); - /* Max length: 13 "WINDOW=65535 " */ - sb_add(m, "WINDOW=%u ", ntohs(th->window)); - /* Max length: 9 "RES=0x3C " */ - sb_add(m, "RES=0x%02x ", (u_int8_t)(ntohl(tcp_flag_word(th) & TCP_RESERVED_BITS) >> 22)); - /* Max length: 32 "CWR ECE URG ACK PSH RST SYN FIN " */ - if (th->cwr) - sb_add(m, "CWR "); - if (th->ece) - sb_add(m, "ECE "); - if (th->urg) - sb_add(m, "URG "); - if (th->ack) - sb_add(m, "ACK "); - if (th->psh) - sb_add(m, "PSH "); - if (th->rst) - sb_add(m, "RST "); - if (th->syn) - sb_add(m, "SYN "); - if (th->fin) - sb_add(m, "FIN "); - /* Max length: 11 "URGP=65535 " */ - sb_add(m, "URGP=%u ", ntohs(th->urg_ptr)); - - if ((logflags & IP6T_LOG_TCPOPT) && - th->doff * 4 > sizeof(struct tcphdr)) { - u_int8_t _opt[60 - sizeof(struct tcphdr)]; - const u_int8_t *op; - unsigned int i; - unsigned int optsize = th->doff * 4 - - sizeof(struct tcphdr); - - op = skb_header_pointer(skb, - ptr + sizeof(struct tcphdr), - optsize, _opt); - if (op == NULL) { - sb_add(m, "OPT (TRUNCATED)"); - return; - } - - /* Max length: 127 "OPT (" 15*4*2chars ") " */ - sb_add(m, "OPT ("); - for (i =0; i < optsize; i++) - sb_add(m, "%02X", op[i]); - sb_add(m, ") "); - } - break; - } - case IPPROTO_UDP: - case IPPROTO_UDPLITE: { - struct udphdr _udph; - const struct udphdr *uh; - - if (currenthdr == IPPROTO_UDP) - /* Max length: 10 "PROTO=UDP " */ - sb_add(m, "PROTO=UDP " ); - else /* Max length: 14 "PROTO=UDPLITE " */ - sb_add(m, "PROTO=UDPLITE "); - - if (fragment) - break; - - /* Max length: 25 "INCOMPLETE [65535 bytes] " */ - uh = skb_header_pointer(skb, ptr, sizeof(_udph), &_udph); - if (uh == NULL) { - sb_add(m, "INCOMPLETE [%u bytes] ", skb->len - ptr); - return; - } - - /* Max length: 20 "SPT=65535 DPT=65535 " */ - sb_add(m, "SPT=%u DPT=%u LEN=%u ", - ntohs(uh->source), ntohs(uh->dest), - ntohs(uh->len)); - break; - } - case IPPROTO_ICMPV6: { - struct icmp6hdr _icmp6h; - const struct icmp6hdr *ic; - - /* Max length: 13 "PROTO=ICMPv6 " */ - sb_add(m, "PROTO=ICMPv6 "); - - if (fragment) - break; - - /* Max length: 25 "INCOMPLETE [65535 bytes] " */ - ic = skb_header_pointer(skb, ptr, sizeof(_icmp6h), &_icmp6h); - if (ic == NULL) { - sb_add(m, "INCOMPLETE [%u bytes] ", skb->len - ptr); - return; - } - - /* Max length: 18 "TYPE=255 CODE=255 " */ - sb_add(m, "TYPE=%u CODE=%u ", ic->icmp6_type, ic->icmp6_code); - - switch (ic->icmp6_type) { - case ICMPV6_ECHO_REQUEST: - case ICMPV6_ECHO_REPLY: - /* Max length: 19 "ID=65535 SEQ=65535 " */ - sb_add(m, "ID=%u SEQ=%u ", - ntohs(ic->icmp6_identifier), - ntohs(ic->icmp6_sequence)); - break; - case ICMPV6_MGM_QUERY: - case ICMPV6_MGM_REPORT: - case ICMPV6_MGM_REDUCTION: - break; - - case ICMPV6_PARAMPROB: - /* Max length: 17 "POINTER=ffffffff " */ - sb_add(m, "POINTER=%08x ", ntohl(ic->icmp6_pointer)); - /* Fall through */ - case ICMPV6_DEST_UNREACH: - case ICMPV6_PKT_TOOBIG: - case ICMPV6_TIME_EXCEED: - /* Max length: 3+maxlen */ - if (recurse) { - sb_add(m, "["); - dump_packet(m, info, skb, - ptr + sizeof(_icmp6h), 0); - sb_add(m, "] "); - } - - /* Max length: 10 "MTU=65535 " */ - if (ic->icmp6_type == ICMPV6_PKT_TOOBIG) - sb_add(m, "MTU=%u ", ntohl(ic->icmp6_mtu)); - } - break; - } - /* Max length: 10 "PROTO=255 " */ - default: - sb_add(m, "PROTO=%u ", currenthdr); - } - - /* Max length: 15 "UID=4294967295 " */ - if ((logflags & IP6T_LOG_UID) && recurse && skb->sk) { - read_lock_bh(&skb->sk->sk_callback_lock); - if (skb->sk->sk_socket && skb->sk->sk_socket->file) - sb_add(m, "UID=%u GID=%u ", - skb->sk->sk_socket->file->f_cred->fsuid, - skb->sk->sk_socket->file->f_cred->fsgid); - read_unlock_bh(&skb->sk->sk_callback_lock); - } - - /* Max length: 16 "MARK=0xFFFFFFFF " */ - if (!recurse && skb->mark) - sb_add(m, "MARK=0x%x ", skb->mark); -} - -static void dump_mac_header(struct sbuff *m, - const struct nf_loginfo *info, - const struct sk_buff *skb) -{ - struct net_device *dev = skb->dev; - unsigned int logflags = 0; - - if (info->type == NF_LOG_TYPE_LOG) - logflags = info->u.log.logflags; - - if (!(logflags & IP6T_LOG_MACDECODE)) - goto fallback; - - switch (dev->type) { - case ARPHRD_ETHER: - sb_add(m, "MACSRC=%pM MACDST=%pM MACPROTO=%04x ", - eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest, - ntohs(eth_hdr(skb)->h_proto)); - return; - default: - break; - } - -fallback: - sb_add(m, "MAC="); - if (dev->hard_header_len && - skb->mac_header != skb->network_header) { - const unsigned char *p = skb_mac_header(skb); - unsigned int len = dev->hard_header_len; - unsigned int i; - - if (dev->type == ARPHRD_SIT && - (p -= ETH_HLEN) < skb->head) - p = NULL; - - if (p != NULL) { - sb_add(m, "%02x", *p++); - for (i = 1; i < len; i++) - sb_add(m, ":%02x", *p++); - } - sb_add(m, " "); - - if (dev->type == ARPHRD_SIT) { - const struct iphdr *iph = - (struct iphdr *)skb_mac_header(skb); - sb_add(m, "TUNNEL=%pI4->%pI4 ", &iph->saddr, &iph->daddr); - } - } else - sb_add(m, " "); -} - -static struct nf_loginfo default_loginfo = { - .type = NF_LOG_TYPE_LOG, - .u = { - .log = { - .level = 5, - .logflags = NF_LOG_MASK, - }, - }, -}; - -static void -ip6t_log_packet(u_int8_t pf, - unsigned int hooknum, - const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct nf_loginfo *loginfo, - const char *prefix) -{ - struct sbuff *m = sb_open(); - - if (!loginfo) - loginfo = &default_loginfo; - - sb_add(m, "<%d>%sIN=%s OUT=%s ", loginfo->u.log.level, - prefix, - in ? in->name : "", - out ? out->name : ""); - - if (in != NULL) - dump_mac_header(m, loginfo, skb); - - dump_packet(m, loginfo, skb, skb_network_offset(skb), 1); - - sb_close(m); -} - -static unsigned int -log_tg6(struct sk_buff *skb, const struct xt_action_param *par) -{ - const struct ip6t_log_info *loginfo = par->targinfo; - struct nf_loginfo li; - - li.type = NF_LOG_TYPE_LOG; - li.u.log.level = loginfo->level; - li.u.log.logflags = loginfo->logflags; - - ip6t_log_packet(NFPROTO_IPV6, par->hooknum, skb, par->in, par->out, - &li, loginfo->prefix); - return XT_CONTINUE; -} - - -static int log_tg6_check(const struct xt_tgchk_param *par) -{ - const struct ip6t_log_info *loginfo = par->targinfo; - - if (loginfo->level >= 8) { - pr_debug("level %u >= 8\n", loginfo->level); - return -EINVAL; - } - if (loginfo->prefix[sizeof(loginfo->prefix)-1] != '\0') { - pr_debug("prefix not null-terminated\n"); - return -EINVAL; - } - return 0; -} - -static struct xt_target log_tg6_reg __read_mostly = { - .name = "LOG", - .family = NFPROTO_IPV6, - .target = log_tg6, - .targetsize = sizeof(struct ip6t_log_info), - .checkentry = log_tg6_check, - .me = THIS_MODULE, -}; - -static struct nf_logger ip6t_logger __read_mostly = { - .name = "ip6t_LOG", - .logfn = &ip6t_log_packet, - .me = THIS_MODULE, -}; - -static int __init log_tg6_init(void) -{ - int ret; - - ret = xt_register_target(&log_tg6_reg); - if (ret < 0) - return ret; - nf_log_register(NFPROTO_IPV6, &ip6t_logger); - return 0; -} - -static void __exit log_tg6_exit(void) -{ - nf_log_unregister(&ip6t_logger); - xt_unregister_target(&log_tg6_reg); -} - -module_init(log_tg6_init); -module_exit(log_tg6_exit); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index f8ac4ef0b794..b895d8b13215 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -524,6 +524,15 @@ config NETFILTER_XT_TARGET_LED For more information on the LEDs available on your system, see Documentation/leds/leds-class.txt +config NETFILTER_XT_TARGET_LOG + tristate "LOG target support" + default m if NETFILTER_ADVANCED=n + help + This option adds a `LOG' target, which allows you to create rules in + any iptables table which records the packet header to the syslog. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_TARGET_MARK tristate '"MARK" target support' depends on NETFILTER_ADVANCED diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 40f4c3d636c5..a28c2a6563f8 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -58,6 +58,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o +obj-$(CONFIG_NETFILTER_XT_TARGET_LOG) += xt_LOG.o obj-$(CONFIG_NETFILTER_XT_TARGET_NFLOG) += xt_NFLOG.o obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o obj-$(CONFIG_NETFILTER_XT_TARGET_NOTRACK) += xt_NOTRACK.o diff --git a/net/netfilter/xt_LOG.c b/net/netfilter/xt_LOG.c new file mode 100644 index 000000000000..1595608a892d --- /dev/null +++ b/net/netfilter/xt_LOG.c @@ -0,0 +1,921 @@ +/* + * This is a module which is used for logging packets. + */ + +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2004 Netfilter Core Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +static struct nf_loginfo default_loginfo = { + .type = NF_LOG_TYPE_LOG, + .u = { + .log = { + .level = 5, + .logflags = NF_LOG_MASK, + }, + }, +}; + +static int dump_udp_header(struct sbuff *m, const struct sk_buff *skb, + u8 proto, int fragment, unsigned int offset) +{ + struct udphdr _udph; + const struct udphdr *uh; + + if (proto == IPPROTO_UDP) + /* Max length: 10 "PROTO=UDP " */ + sb_add(m, "PROTO=UDP "); + else /* Max length: 14 "PROTO=UDPLITE " */ + sb_add(m, "PROTO=UDPLITE "); + + if (fragment) + goto out; + + /* Max length: 25 "INCOMPLETE [65535 bytes] " */ + uh = skb_header_pointer(skb, offset, sizeof(_udph), &_udph); + if (uh == NULL) { + sb_add(m, "INCOMPLETE [%u bytes] ", skb->len - offset); + + return 1; + } + + /* Max length: 20 "SPT=65535 DPT=65535 " */ + sb_add(m, "SPT=%u DPT=%u LEN=%u ", ntohs(uh->source), ntohs(uh->dest), + ntohs(uh->len)); + +out: + return 0; +} + +static int dump_tcp_header(struct sbuff *m, const struct sk_buff *skb, + u8 proto, int fragment, unsigned int offset, + unsigned int logflags) +{ + struct tcphdr _tcph; + const struct tcphdr *th; + + /* Max length: 10 "PROTO=TCP " */ + sb_add(m, "PROTO=TCP "); + + if (fragment) + return 0; + + /* Max length: 25 "INCOMPLETE [65535 bytes] " */ + th = skb_header_pointer(skb, offset, sizeof(_tcph), &_tcph); + if (th == NULL) { + sb_add(m, "INCOMPLETE [%u bytes] ", skb->len - offset); + return 1; + } + + /* Max length: 20 "SPT=65535 DPT=65535 " */ + sb_add(m, "SPT=%u DPT=%u ", ntohs(th->source), ntohs(th->dest)); + /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */ + if (logflags & XT_LOG_TCPSEQ) + sb_add(m, "SEQ=%u ACK=%u ", ntohl(th->seq), ntohl(th->ack_seq)); + + /* Max length: 13 "WINDOW=65535 " */ + sb_add(m, "WINDOW=%u ", ntohs(th->window)); + /* Max length: 9 "RES=0x3C " */ + sb_add(m, "RES=0x%02x ", (u_int8_t)(ntohl(tcp_flag_word(th) & + TCP_RESERVED_BITS) >> 22)); + /* Max length: 32 "CWR ECE URG ACK PSH RST SYN FIN " */ + if (th->cwr) + sb_add(m, "CWR "); + if (th->ece) + sb_add(m, "ECE "); + if (th->urg) + sb_add(m, "URG "); + if (th->ack) + sb_add(m, "ACK "); + if (th->psh) + sb_add(m, "PSH "); + if (th->rst) + sb_add(m, "RST "); + if (th->syn) + sb_add(m, "SYN "); + if (th->fin) + sb_add(m, "FIN "); + /* Max length: 11 "URGP=65535 " */ + sb_add(m, "URGP=%u ", ntohs(th->urg_ptr)); + + if ((logflags & XT_LOG_TCPOPT) && th->doff*4 > sizeof(struct tcphdr)) { + u_int8_t _opt[60 - sizeof(struct tcphdr)]; + const u_int8_t *op; + unsigned int i; + unsigned int optsize = th->doff*4 - sizeof(struct tcphdr); + + op = skb_header_pointer(skb, offset + sizeof(struct tcphdr), + optsize, _opt); + if (op == NULL) { + sb_add(m, "OPT (TRUNCATED)"); + return 1; + } + + /* Max length: 127 "OPT (" 15*4*2chars ") " */ + sb_add(m, "OPT ("); + for (i = 0; i < optsize; i++) + sb_add(m, "%02X", op[i]); + + sb_add(m, ") "); + } + + return 0; +} + +/* One level of recursion won't kill us */ +static void dump_ipv4_packet(struct sbuff *m, + const struct nf_loginfo *info, + const struct sk_buff *skb, + unsigned int iphoff) +{ + struct iphdr _iph; + const struct iphdr *ih; + unsigned int logflags; + + if (info->type == NF_LOG_TYPE_LOG) + logflags = info->u.log.logflags; + else + logflags = NF_LOG_MASK; + + ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph); + if (ih == NULL) { + sb_add(m, "TRUNCATED"); + return; + } + + /* Important fields: + * TOS, len, DF/MF, fragment offset, TTL, src, dst, options. */ + /* Max length: 40 "SRC=255.255.255.255 DST=255.255.255.255 " */ + sb_add(m, "SRC=%pI4 DST=%pI4 ", + &ih->saddr, &ih->daddr); + + /* Max length: 46 "LEN=65535 TOS=0xFF PREC=0xFF TTL=255 ID=65535 " */ + sb_add(m, "LEN=%u TOS=0x%02X PREC=0x%02X TTL=%u ID=%u ", + ntohs(ih->tot_len), ih->tos & IPTOS_TOS_MASK, + ih->tos & IPTOS_PREC_MASK, ih->ttl, ntohs(ih->id)); + + /* Max length: 6 "CE DF MF " */ + if (ntohs(ih->frag_off) & IP_CE) + sb_add(m, "CE "); + if (ntohs(ih->frag_off) & IP_DF) + sb_add(m, "DF "); + if (ntohs(ih->frag_off) & IP_MF) + sb_add(m, "MF "); + + /* Max length: 11 "FRAG:65535 " */ + if (ntohs(ih->frag_off) & IP_OFFSET) + sb_add(m, "FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET); + + if ((logflags & XT_LOG_IPOPT) && + ih->ihl * 4 > sizeof(struct iphdr)) { + const unsigned char *op; + unsigned char _opt[4 * 15 - sizeof(struct iphdr)]; + unsigned int i, optsize; + + optsize = ih->ihl * 4 - sizeof(struct iphdr); + op = skb_header_pointer(skb, iphoff+sizeof(_iph), + optsize, _opt); + if (op == NULL) { + sb_add(m, "TRUNCATED"); + return; + } + + /* Max length: 127 "OPT (" 15*4*2chars ") " */ + sb_add(m, "OPT ("); + for (i = 0; i < optsize; i++) + sb_add(m, "%02X", op[i]); + sb_add(m, ") "); + } + + switch (ih->protocol) { + case IPPROTO_TCP: + if (dump_tcp_header(m, skb, ih->protocol, + ntohs(ih->frag_off) & IP_OFFSET, + iphoff+ih->ihl*4, logflags)) + return; + case IPPROTO_UDP: + case IPPROTO_UDPLITE: + if (dump_udp_header(m, skb, ih->protocol, + ntohs(ih->frag_off) & IP_OFFSET, + iphoff+ih->ihl*4)) + return; + case IPPROTO_ICMP: { + struct icmphdr _icmph; + const struct icmphdr *ich; + static const size_t required_len[NR_ICMP_TYPES+1] + = { [ICMP_ECHOREPLY] = 4, + [ICMP_DEST_UNREACH] + = 8 + sizeof(struct iphdr), + [ICMP_SOURCE_QUENCH] + = 8 + sizeof(struct iphdr), + [ICMP_REDIRECT] + = 8 + sizeof(struct iphdr), + [ICMP_ECHO] = 4, + [ICMP_TIME_EXCEEDED] + = 8 + sizeof(struct iphdr), + [ICMP_PARAMETERPROB] + = 8 + sizeof(struct iphdr), + [ICMP_TIMESTAMP] = 20, + [ICMP_TIMESTAMPREPLY] = 20, + [ICMP_ADDRESS] = 12, + [ICMP_ADDRESSREPLY] = 12 }; + + /* Max length: 11 "PROTO=ICMP " */ + sb_add(m, "PROTO=ICMP "); + + if (ntohs(ih->frag_off) & IP_OFFSET) + break; + + /* Max length: 25 "INCOMPLETE [65535 bytes] " */ + ich = skb_header_pointer(skb, iphoff + ih->ihl * 4, + sizeof(_icmph), &_icmph); + if (ich == NULL) { + sb_add(m, "INCOMPLETE [%u bytes] ", + skb->len - iphoff - ih->ihl*4); + break; + } + + /* Max length: 18 "TYPE=255 CODE=255 " */ + sb_add(m, "TYPE=%u CODE=%u ", ich->type, ich->code); + + /* Max length: 25 "INCOMPLETE [65535 bytes] " */ + if (ich->type <= NR_ICMP_TYPES && + required_len[ich->type] && + skb->len-iphoff-ih->ihl*4 < required_len[ich->type]) { + sb_add(m, "INCOMPLETE [%u bytes] ", + skb->len - iphoff - ih->ihl*4); + break; + } + + switch (ich->type) { + case ICMP_ECHOREPLY: + case ICMP_ECHO: + /* Max length: 19 "ID=65535 SEQ=65535 " */ + sb_add(m, "ID=%u SEQ=%u ", + ntohs(ich->un.echo.id), + ntohs(ich->un.echo.sequence)); + break; + + case ICMP_PARAMETERPROB: + /* Max length: 14 "PARAMETER=255 " */ + sb_add(m, "PARAMETER=%u ", + ntohl(ich->un.gateway) >> 24); + break; + case ICMP_REDIRECT: + /* Max length: 24 "GATEWAY=255.255.255.255 " */ + sb_add(m, "GATEWAY=%pI4 ", &ich->un.gateway); + /* Fall through */ + case ICMP_DEST_UNREACH: + case ICMP_SOURCE_QUENCH: + case ICMP_TIME_EXCEEDED: + /* Max length: 3+maxlen */ + if (!iphoff) { /* Only recurse once. */ + sb_add(m, "["); + dump_ipv4_packet(m, info, skb, + iphoff + ih->ihl*4+sizeof(_icmph)); + sb_add(m, "] "); + } + + /* Max length: 10 "MTU=65535 " */ + if (ich->type == ICMP_DEST_UNREACH && + ich->code == ICMP_FRAG_NEEDED) + sb_add(m, "MTU=%u ", ntohs(ich->un.frag.mtu)); + } + break; + } + /* Max Length */ + case IPPROTO_AH: { + struct ip_auth_hdr _ahdr; + const struct ip_auth_hdr *ah; + + if (ntohs(ih->frag_off) & IP_OFFSET) + break; + + /* Max length: 9 "PROTO=AH " */ + sb_add(m, "PROTO=AH "); + + /* Max length: 25 "INCOMPLETE [65535 bytes] " */ + ah = skb_header_pointer(skb, iphoff+ih->ihl*4, + sizeof(_ahdr), &_ahdr); + if (ah == NULL) { + sb_add(m, "INCOMPLETE [%u bytes] ", + skb->len - iphoff - ih->ihl*4); + break; + } + + /* Length: 15 "SPI=0xF1234567 " */ + sb_add(m, "SPI=0x%x ", ntohl(ah->spi)); + break; + } + case IPPROTO_ESP: { + struct ip_esp_hdr _esph; + const struct ip_esp_hdr *eh; + + /* Max length: 10 "PROTO=ESP " */ + sb_add(m, "PROTO=ESP "); + + if (ntohs(ih->frag_off) & IP_OFFSET) + break; + + /* Max length: 25 "INCOMPLETE [65535 bytes] " */ + eh = skb_header_pointer(skb, iphoff+ih->ihl*4, + sizeof(_esph), &_esph); + if (eh == NULL) { + sb_add(m, "INCOMPLETE [%u bytes] ", + skb->len - iphoff - ih->ihl*4); + break; + } + + /* Length: 15 "SPI=0xF1234567 " */ + sb_add(m, "SPI=0x%x ", ntohl(eh->spi)); + break; + } + /* Max length: 10 "PROTO 255 " */ + default: + sb_add(m, "PROTO=%u ", ih->protocol); + } + + /* Max length: 15 "UID=4294967295 " */ + if ((logflags & XT_LOG_UID) && !iphoff && skb->sk) { + read_lock_bh(&skb->sk->sk_callback_lock); + if (skb->sk->sk_socket && skb->sk->sk_socket->file) + sb_add(m, "UID=%u GID=%u ", + skb->sk->sk_socket->file->f_cred->fsuid, + skb->sk->sk_socket->file->f_cred->fsgid); + read_unlock_bh(&skb->sk->sk_callback_lock); + } + + /* Max length: 16 "MARK=0xFFFFFFFF " */ + if (!iphoff && skb->mark) + sb_add(m, "MARK=0x%x ", skb->mark); + + /* Proto Max log string length */ + /* IP: 40+46+6+11+127 = 230 */ + /* TCP: 10+max(25,20+30+13+9+32+11+127) = 252 */ + /* UDP: 10+max(25,20) = 35 */ + /* UDPLITE: 14+max(25,20) = 39 */ + /* ICMP: 11+max(25, 18+25+max(19,14,24+3+n+10,3+n+10)) = 91+n */ + /* ESP: 10+max(25)+15 = 50 */ + /* AH: 9+max(25)+15 = 49 */ + /* unknown: 10 */ + + /* (ICMP allows recursion one level deep) */ + /* maxlen = IP + ICMP + IP + max(TCP,UDP,ICMP,unknown) */ + /* maxlen = 230+ 91 + 230 + 252 = 803 */ +} + +static void dump_ipv4_mac_header(struct sbuff *m, + const struct nf_loginfo *info, + const struct sk_buff *skb) +{ + struct net_device *dev = skb->dev; + unsigned int logflags = 0; + + if (info->type == NF_LOG_TYPE_LOG) + logflags = info->u.log.logflags; + + if (!(logflags & XT_LOG_MACDECODE)) + goto fallback; + + switch (dev->type) { + case ARPHRD_ETHER: + sb_add(m, "MACSRC=%pM MACDST=%pM MACPROTO=%04x ", + eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest, + ntohs(eth_hdr(skb)->h_proto)); + return; + default: + break; + } + +fallback: + sb_add(m, "MAC="); + if (dev->hard_header_len && + skb->mac_header != skb->network_header) { + const unsigned char *p = skb_mac_header(skb); + unsigned int i; + + sb_add(m, "%02x", *p++); + for (i = 1; i < dev->hard_header_len; i++, p++) + sb_add(m, ":%02x", *p); + } + sb_add(m, " "); +} + +static void +log_packet_common(struct sbuff *m, + u_int8_t pf, + unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct nf_loginfo *loginfo, + const char *prefix) +{ + sb_add(m, "<%d>%sIN=%s OUT=%s ", loginfo->u.log.level, + prefix, + in ? in->name : "", + out ? out->name : ""); +#ifdef CONFIG_BRIDGE_NETFILTER + if (skb->nf_bridge) { + const struct net_device *physindev; + const struct net_device *physoutdev; + + physindev = skb->nf_bridge->physindev; + if (physindev && in != physindev) + sb_add(m, "PHYSIN=%s ", physindev->name); + physoutdev = skb->nf_bridge->physoutdev; + if (physoutdev && out != physoutdev) + sb_add(m, "PHYSOUT=%s ", physoutdev->name); + } +#endif +} + + +static void +ipt_log_packet(u_int8_t pf, + unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct nf_loginfo *loginfo, + const char *prefix) +{ + struct sbuff *m = sb_open(); + + if (!loginfo) + loginfo = &default_loginfo; + + log_packet_common(m, pf, hooknum, skb, in, out, loginfo, prefix); + + if (in != NULL) + dump_ipv4_mac_header(m, loginfo, skb); + + dump_ipv4_packet(m, loginfo, skb, 0); + + sb_close(m); +} + +#if IS_ENABLED(CONFIG_IPV6) +/* One level of recursion won't kill us */ +static void dump_ipv6_packet(struct sbuff *m, + const struct nf_loginfo *info, + const struct sk_buff *skb, unsigned int ip6hoff, + int recurse) +{ + u_int8_t currenthdr; + int fragment; + struct ipv6hdr _ip6h; + const struct ipv6hdr *ih; + unsigned int ptr; + unsigned int hdrlen = 0; + unsigned int logflags; + + if (info->type == NF_LOG_TYPE_LOG) + logflags = info->u.log.logflags; + else + logflags = NF_LOG_MASK; + + ih = skb_header_pointer(skb, ip6hoff, sizeof(_ip6h), &_ip6h); + if (ih == NULL) { + sb_add(m, "TRUNCATED"); + return; + } + + /* Max length: 88 "SRC=0000.0000.0000.0000.0000.0000.0000.0000 DST=0000.0000.0000.0000.0000.0000.0000.0000 " */ + sb_add(m, "SRC=%pI6 DST=%pI6 ", &ih->saddr, &ih->daddr); + + /* Max length: 44 "LEN=65535 TC=255 HOPLIMIT=255 FLOWLBL=FFFFF " */ + sb_add(m, "LEN=%Zu TC=%u HOPLIMIT=%u FLOWLBL=%u ", + ntohs(ih->payload_len) + sizeof(struct ipv6hdr), + (ntohl(*(__be32 *)ih) & 0x0ff00000) >> 20, + ih->hop_limit, + (ntohl(*(__be32 *)ih) & 0x000fffff)); + + fragment = 0; + ptr = ip6hoff + sizeof(struct ipv6hdr); + currenthdr = ih->nexthdr; + while (currenthdr != NEXTHDR_NONE && ip6t_ext_hdr(currenthdr)) { + struct ipv6_opt_hdr _hdr; + const struct ipv6_opt_hdr *hp; + + hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr); + if (hp == NULL) { + sb_add(m, "TRUNCATED"); + return; + } + + /* Max length: 48 "OPT (...) " */ + if (logflags & XT_LOG_IPOPT) + sb_add(m, "OPT ( "); + + switch (currenthdr) { + case IPPROTO_FRAGMENT: { + struct frag_hdr _fhdr; + const struct frag_hdr *fh; + + sb_add(m, "FRAG:"); + fh = skb_header_pointer(skb, ptr, sizeof(_fhdr), + &_fhdr); + if (fh == NULL) { + sb_add(m, "TRUNCATED "); + return; + } + + /* Max length: 6 "65535 " */ + sb_add(m, "%u ", ntohs(fh->frag_off) & 0xFFF8); + + /* Max length: 11 "INCOMPLETE " */ + if (fh->frag_off & htons(0x0001)) + sb_add(m, "INCOMPLETE "); + + sb_add(m, "ID:%08x ", ntohl(fh->identification)); + + if (ntohs(fh->frag_off) & 0xFFF8) + fragment = 1; + + hdrlen = 8; + + break; + } + case IPPROTO_DSTOPTS: + case IPPROTO_ROUTING: + case IPPROTO_HOPOPTS: + if (fragment) { + if (logflags & XT_LOG_IPOPT) + sb_add(m, ")"); + return; + } + hdrlen = ipv6_optlen(hp); + break; + /* Max Length */ + case IPPROTO_AH: + if (logflags & XT_LOG_IPOPT) { + struct ip_auth_hdr _ahdr; + const struct ip_auth_hdr *ah; + + /* Max length: 3 "AH " */ + sb_add(m, "AH "); + + if (fragment) { + sb_add(m, ")"); + return; + } + + ah = skb_header_pointer(skb, ptr, sizeof(_ahdr), + &_ahdr); + if (ah == NULL) { + /* + * Max length: 26 "INCOMPLETE [65535 + * bytes] )" + */ + sb_add(m, "INCOMPLETE [%u bytes] )", + skb->len - ptr); + return; + } + + /* Length: 15 "SPI=0xF1234567 */ + sb_add(m, "SPI=0x%x ", ntohl(ah->spi)); + + } + + hdrlen = (hp->hdrlen+2)<<2; + break; + case IPPROTO_ESP: + if (logflags & XT_LOG_IPOPT) { + struct ip_esp_hdr _esph; + const struct ip_esp_hdr *eh; + + /* Max length: 4 "ESP " */ + sb_add(m, "ESP "); + + if (fragment) { + sb_add(m, ")"); + return; + } + + /* + * Max length: 26 "INCOMPLETE [65535 bytes] )" + */ + eh = skb_header_pointer(skb, ptr, sizeof(_esph), + &_esph); + if (eh == NULL) { + sb_add(m, "INCOMPLETE [%u bytes] )", + skb->len - ptr); + return; + } + + /* Length: 16 "SPI=0xF1234567 )" */ + sb_add(m, "SPI=0x%x )", ntohl(eh->spi)); + + } + return; + default: + /* Max length: 20 "Unknown Ext Hdr 255" */ + sb_add(m, "Unknown Ext Hdr %u", currenthdr); + return; + } + if (logflags & XT_LOG_IPOPT) + sb_add(m, ") "); + + currenthdr = hp->nexthdr; + ptr += hdrlen; + } + + switch (currenthdr) { + case IPPROTO_TCP: + if (dump_tcp_header(m, skb, currenthdr, fragment, ptr, + logflags)) + return; + case IPPROTO_UDP: + case IPPROTO_UDPLITE: + if (dump_udp_header(m, skb, currenthdr, fragment, ptr)) + return; + case IPPROTO_ICMPV6: { + struct icmp6hdr _icmp6h; + const struct icmp6hdr *ic; + + /* Max length: 13 "PROTO=ICMPv6 " */ + sb_add(m, "PROTO=ICMPv6 "); + + if (fragment) + break; + + /* Max length: 25 "INCOMPLETE [65535 bytes] " */ + ic = skb_header_pointer(skb, ptr, sizeof(_icmp6h), &_icmp6h); + if (ic == NULL) { + sb_add(m, "INCOMPLETE [%u bytes] ", skb->len - ptr); + return; + } + + /* Max length: 18 "TYPE=255 CODE=255 " */ + sb_add(m, "TYPE=%u CODE=%u ", ic->icmp6_type, ic->icmp6_code); + + switch (ic->icmp6_type) { + case ICMPV6_ECHO_REQUEST: + case ICMPV6_ECHO_REPLY: + /* Max length: 19 "ID=65535 SEQ=65535 " */ + sb_add(m, "ID=%u SEQ=%u ", + ntohs(ic->icmp6_identifier), + ntohs(ic->icmp6_sequence)); + break; + case ICMPV6_MGM_QUERY: + case ICMPV6_MGM_REPORT: + case ICMPV6_MGM_REDUCTION: + break; + + case ICMPV6_PARAMPROB: + /* Max length: 17 "POINTER=ffffffff " */ + sb_add(m, "POINTER=%08x ", ntohl(ic->icmp6_pointer)); + /* Fall through */ + case ICMPV6_DEST_UNREACH: + case ICMPV6_PKT_TOOBIG: + case ICMPV6_TIME_EXCEED: + /* Max length: 3+maxlen */ + if (recurse) { + sb_add(m, "["); + dump_ipv6_packet(m, info, skb, + ptr + sizeof(_icmp6h), 0); + sb_add(m, "] "); + } + + /* Max length: 10 "MTU=65535 " */ + if (ic->icmp6_type == ICMPV6_PKT_TOOBIG) + sb_add(m, "MTU=%u ", ntohl(ic->icmp6_mtu)); + } + break; + } + /* Max length: 10 "PROTO=255 " */ + default: + sb_add(m, "PROTO=%u ", currenthdr); + } + + /* Max length: 15 "UID=4294967295 " */ + if ((logflags & XT_LOG_UID) && recurse && skb->sk) { + read_lock_bh(&skb->sk->sk_callback_lock); + if (skb->sk->sk_socket && skb->sk->sk_socket->file) + sb_add(m, "UID=%u GID=%u ", + skb->sk->sk_socket->file->f_cred->fsuid, + skb->sk->sk_socket->file->f_cred->fsgid); + read_unlock_bh(&skb->sk->sk_callback_lock); + } + + /* Max length: 16 "MARK=0xFFFFFFFF " */ + if (!recurse && skb->mark) + sb_add(m, "MARK=0x%x ", skb->mark); +} + +static void dump_ipv6_mac_header(struct sbuff *m, + const struct nf_loginfo *info, + const struct sk_buff *skb) +{ + struct net_device *dev = skb->dev; + unsigned int logflags = 0; + + if (info->type == NF_LOG_TYPE_LOG) + logflags = info->u.log.logflags; + + if (!(logflags & XT_LOG_MACDECODE)) + goto fallback; + + switch (dev->type) { + case ARPHRD_ETHER: + sb_add(m, "MACSRC=%pM MACDST=%pM MACPROTO=%04x ", + eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest, + ntohs(eth_hdr(skb)->h_proto)); + return; + default: + break; + } + +fallback: + sb_add(m, "MAC="); + if (dev->hard_header_len && + skb->mac_header != skb->network_header) { + const unsigned char *p = skb_mac_header(skb); + unsigned int len = dev->hard_header_len; + unsigned int i; + + if (dev->type == ARPHRD_SIT) { + p -= ETH_HLEN; + + if (p < skb->head) + p = NULL; + } + + if (p != NULL) { + sb_add(m, "%02x", *p++); + for (i = 1; i < len; i++) + sb_add(m, ":%02x", *p++); + } + sb_add(m, " "); + + if (dev->type == ARPHRD_SIT) { + const struct iphdr *iph = + (struct iphdr *)skb_mac_header(skb); + sb_add(m, "TUNNEL=%pI4->%pI4 ", &iph->saddr, + &iph->daddr); + } + } else + sb_add(m, " "); +} + +static void +ip6t_log_packet(u_int8_t pf, + unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct nf_loginfo *loginfo, + const char *prefix) +{ + struct sbuff *m = sb_open(); + + if (!loginfo) + loginfo = &default_loginfo; + + log_packet_common(m, pf, hooknum, skb, in, out, loginfo, prefix); + + if (in != NULL) + dump_ipv6_mac_header(m, loginfo, skb); + + dump_ipv6_packet(m, loginfo, skb, skb_network_offset(skb), 1); + + sb_close(m); +} +#endif + +static unsigned int +log_tg(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct xt_log_info *loginfo = par->targinfo; + struct nf_loginfo li; + + li.type = NF_LOG_TYPE_LOG; + li.u.log.level = loginfo->level; + li.u.log.logflags = loginfo->logflags; + + if (par->family == NFPROTO_IPV4) + ipt_log_packet(NFPROTO_IPV4, par->hooknum, skb, par->in, + par->out, &li, loginfo->prefix); +#if IS_ENABLED(CONFIG_IPV6) + else if (par->family == NFPROTO_IPV6) + ip6t_log_packet(NFPROTO_IPV6, par->hooknum, skb, par->in, + par->out, &li, loginfo->prefix); +#endif + else + WARN_ON_ONCE(1); + + return XT_CONTINUE; +} + +static int log_tg_check(const struct xt_tgchk_param *par) +{ + const struct xt_log_info *loginfo = par->targinfo; + + if (par->family != NFPROTO_IPV4 && par->family != NFPROTO_IPV6) + return -EINVAL; + + if (loginfo->level >= 8) { + pr_debug("level %u >= 8\n", loginfo->level); + return -EINVAL; + } + + if (loginfo->prefix[sizeof(loginfo->prefix)-1] != '\0') { + pr_debug("prefix is not null-terminated\n"); + return -EINVAL; + } + + return 0; +} + +static struct xt_target log_tg_regs[] __read_mostly = { + { + .name = "LOG", + .family = NFPROTO_IPV4, + .target = log_tg, + .targetsize = sizeof(struct xt_log_info), + .checkentry = log_tg_check, + .me = THIS_MODULE, + }, +#if IS_ENABLED(CONFIG_IPV6) + { + .name = "LOG", + .family = NFPROTO_IPV6, + .target = log_tg, + .targetsize = sizeof(struct xt_log_info), + .checkentry = log_tg_check, + .me = THIS_MODULE, + }, +#endif +}; + +static struct nf_logger ipt_log_logger __read_mostly = { + .name = "ipt_LOG", + .logfn = &ipt_log_packet, + .me = THIS_MODULE, +}; + +#if IS_ENABLED(CONFIG_IPV6) +static struct nf_logger ip6t_log_logger __read_mostly = { + .name = "ip6t_LOG", + .logfn = &ip6t_log_packet, + .me = THIS_MODULE, +}; +#endif + +static int __init log_tg_init(void) +{ + int ret; + + ret = xt_register_targets(log_tg_regs, ARRAY_SIZE(log_tg_regs)); + if (ret < 0) + return ret; + + nf_log_register(NFPROTO_IPV4, &ipt_log_logger); +#if IS_ENABLED(CONFIG_IPV6) + nf_log_register(NFPROTO_IPV6, &ip6t_log_logger); +#endif + return 0; +} + +static void __exit log_tg_exit(void) +{ + nf_log_unregister(&ipt_log_logger); +#if IS_ENABLED(CONFIG_IPV6) + nf_log_unregister(&ip6t_log_logger); +#endif + xt_unregister_targets(log_tg_regs, ARRAY_SIZE(log_tg_regs)); +} + +module_init(log_tg_init); +module_exit(log_tg_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Netfilter Core Team "); +MODULE_AUTHOR("Jan Rekorajski "); +MODULE_DESCRIPTION("Xtables: IPv4/IPv6 packet logging"); +MODULE_ALIAS("ipt_LOG"); +MODULE_ALIAS("ip6t_LOG"); -- cgit v1.2.3 From 2c8503f55fbdfbeff4164f133df804cf4d316290 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 28 Feb 2012 18:23:31 +0100 Subject: netfilter: nf_conntrack: pass timeout array to l4->new and l4->packet This patch defines a new interface for l4 protocol trackers: unsigned int *(*get_timeouts)(struct net *net); that is used to return the array of unsigned int that contains the timeouts that will be applied for this flow. This is passed to the l4proto->new(...) and l4proto->packet(...) functions to specify the timeout policy. This interface allows per-net global timeout configuration (although only DCCP supports this by now) and it will allow custom custom timeout configuration by means of follow-up patches. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l4proto.h | 8 +++++-- net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 13 +++++++++--- net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 13 +++++++++--- net/netfilter/nf_conntrack_core.c | 18 ++++++++++------ net/netfilter/nf_conntrack_proto_dccp.c | 16 +++++++++----- net/netfilter/nf_conntrack_proto_generic.c | 29 ++++++++++++++++---------- net/netfilter/nf_conntrack_proto_gre.c | 15 +++++++++---- net/netfilter/nf_conntrack_proto_sctp.c | 14 ++++++++++--- net/netfilter/nf_conntrack_proto_tcp.c | 22 ++++++++++++------- net/netfilter/nf_conntrack_proto_udp.c | 16 ++++++++++---- net/netfilter/nf_conntrack_proto_udplite.c | 16 ++++++++++---- 11 files changed, 128 insertions(+), 52 deletions(-) (limited to 'net/ipv4') diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index e3d3ee3c06a2..c48b67405aa0 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -39,12 +39,13 @@ struct nf_conntrack_l4proto { unsigned int dataoff, enum ip_conntrack_info ctinfo, u_int8_t pf, - unsigned int hooknum); + unsigned int hooknum, + unsigned int *timeouts); /* Called when a new connection for this protocol found; * returns TRUE if it's OK. If so, packet() called next. */ bool (*new)(struct nf_conn *ct, const struct sk_buff *skb, - unsigned int dataoff); + unsigned int dataoff, unsigned int *timeouts); /* Called when a conntrack entry is destroyed */ void (*destroy)(struct nf_conn *ct); @@ -60,6 +61,9 @@ struct nf_conntrack_l4proto { /* Print out the private part of the conntrack. */ int (*print_conntrack)(struct seq_file *s, struct nf_conn *); + /* Return the array of timeouts for this protocol. */ + unsigned int *(*get_timeouts)(struct net *net); + /* convert protoinfo to nfnetink attributes */ int (*to_nlattr)(struct sk_buff *skb, struct nlattr *nla, struct nf_conn *ct); diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index ab5b27a2916f..6b801124b31f 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -75,25 +75,31 @@ static int icmp_print_tuple(struct seq_file *s, ntohs(tuple->src.u.icmp.id)); } +static unsigned int *icmp_get_timeouts(struct net *net) +{ + return &nf_ct_icmp_timeout; +} + /* Returns verdict for packet, or -1 for invalid. */ static int icmp_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, u_int8_t pf, - unsigned int hooknum) + unsigned int hooknum, + unsigned int *timeout) { /* Do not immediately delete the connection after the first successful reply to avoid excessive conntrackd traffic and also to handle correctly ICMP echo reply duplicates. */ - nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmp_timeout); + nf_ct_refresh_acct(ct, ctinfo, skb, *timeout); return NF_ACCEPT; } /* Called when a new connection for this protocol found. */ static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb, - unsigned int dataoff) + unsigned int dataoff, unsigned int *timeouts) { static const u_int8_t valid_new[] = { [ICMP_ECHO] = 1, @@ -298,6 +304,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly = .invert_tuple = icmp_invert_tuple, .print_tuple = icmp_print_tuple, .packet = icmp_packet, + .get_timeouts = icmp_get_timeouts, .new = icmp_new, .error = icmp_error, .destroy = NULL, diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 7c05e7eacbc6..2eb9751eb7a8 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -88,25 +88,31 @@ static int icmpv6_print_tuple(struct seq_file *s, ntohs(tuple->src.u.icmp.id)); } +static unsigned int *icmpv6_get_timeouts(struct net *net) +{ + return &nf_ct_icmpv6_timeout; +} + /* Returns verdict for packet, or -1 for invalid. */ static int icmpv6_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, u_int8_t pf, - unsigned int hooknum) + unsigned int hooknum, + unsigned int *timeout) { /* Do not immediately delete the connection after the first successful reply to avoid excessive conntrackd traffic and also to handle correctly ICMP echo reply duplicates. */ - nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmpv6_timeout); + nf_ct_refresh_acct(ct, ctinfo, skb, *timeout); return NF_ACCEPT; } /* Called when a new connection for this protocol found. */ static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb, - unsigned int dataoff) + unsigned int dataoff, unsigned int *timeouts) { static const u_int8_t valid_new[] = { [ICMPV6_ECHO_REQUEST - 128] = 1, @@ -293,6 +299,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly = .invert_tuple = icmpv6_invert_tuple, .print_tuple = icmpv6_print_tuple, .packet = icmpv6_packet, + .get_timeouts = icmpv6_get_timeouts, .new = icmpv6_new, .error = icmpv6_error, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index ed86a3be678e..d18995eea1c6 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -763,7 +763,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, struct nf_conntrack_l3proto *l3proto, struct nf_conntrack_l4proto *l4proto, struct sk_buff *skb, - unsigned int dataoff, u32 hash) + unsigned int dataoff, u32 hash, + unsigned int *timeouts) { struct nf_conn *ct; struct nf_conn_help *help; @@ -782,7 +783,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, if (IS_ERR(ct)) return (struct nf_conntrack_tuple_hash *)ct; - if (!l4proto->new(ct, skb, dataoff)) { + if (!l4proto->new(ct, skb, dataoff, timeouts)) { nf_conntrack_free(ct); pr_debug("init conntrack: can't track with proto module\n"); return NULL; @@ -848,7 +849,8 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl, struct nf_conntrack_l3proto *l3proto, struct nf_conntrack_l4proto *l4proto, int *set_reply, - enum ip_conntrack_info *ctinfo) + enum ip_conntrack_info *ctinfo, + unsigned int *timeouts) { struct nf_conntrack_tuple tuple; struct nf_conntrack_tuple_hash *h; @@ -868,7 +870,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl, h = __nf_conntrack_find_get(net, zone, &tuple, hash); if (!h) { h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto, - skb, dataoff, hash); + skb, dataoff, hash, timeouts); if (!h) return NULL; if (IS_ERR(h)) @@ -909,6 +911,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, enum ip_conntrack_info ctinfo; struct nf_conntrack_l3proto *l3proto; struct nf_conntrack_l4proto *l4proto; + unsigned int *timeouts; unsigned int dataoff; u_int8_t protonum; int set_reply = 0; @@ -955,8 +958,11 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, goto out; } + timeouts = l4proto->get_timeouts(net); + ct = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum, - l3proto, l4proto, &set_reply, &ctinfo); + l3proto, l4proto, &set_reply, &ctinfo, + timeouts); if (!ct) { /* Not valid part of a connection */ NF_CT_STAT_INC_ATOMIC(net, invalid); @@ -973,7 +979,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, NF_CT_ASSERT(skb->nfct); - ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, hooknum); + ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, hooknum, timeouts); if (ret <= 0) { /* Invalid: inverse of the return code tells * the netfilter core what to do */ diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index d6dde6dc09e6..8ea33598a0a7 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -423,7 +423,7 @@ static bool dccp_invert_tuple(struct nf_conntrack_tuple *inv, } static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb, - unsigned int dataoff) + unsigned int dataoff, unsigned int *timeouts) { struct net *net = nf_ct_net(ct); struct dccp_net *dn; @@ -472,12 +472,17 @@ static u64 dccp_ack_seq(const struct dccp_hdr *dh) ntohl(dhack->dccph_ack_nr_low); } +static unsigned int *dccp_get_timeouts(struct net *net) +{ + return dccp_pernet(net)->dccp_timeout; +} + static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - u_int8_t pf, unsigned int hooknum) + u_int8_t pf, unsigned int hooknum, + unsigned int *timeouts) { struct net *net = nf_ct_net(ct); - struct dccp_net *dn; enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); struct dccp_hdr _dh, *dh; u_int8_t type, old_state, new_state; @@ -559,8 +564,7 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, if (new_state != old_state) nf_conntrack_event_cache(IPCT_PROTOINFO, ct); - dn = dccp_pernet(net); - nf_ct_refresh_acct(ct, ctinfo, skb, dn->dccp_timeout[new_state]); + nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[new_state]); return NF_ACCEPT; } @@ -767,6 +771,7 @@ static struct nf_conntrack_l4proto dccp_proto4 __read_mostly = { .invert_tuple = dccp_invert_tuple, .new = dccp_new, .packet = dccp_packet, + .get_timeouts = dccp_get_timeouts, .error = dccp_error, .print_tuple = dccp_print_tuple, .print_conntrack = dccp_print_conntrack, @@ -789,6 +794,7 @@ static struct nf_conntrack_l4proto dccp_proto6 __read_mostly = { .invert_tuple = dccp_invert_tuple, .new = dccp_new, .packet = dccp_packet, + .get_timeouts = dccp_get_timeouts, .error = dccp_error, .print_tuple = dccp_print_tuple, .print_conntrack = dccp_print_conntrack, diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index e2091d0c7a2f..0e6c5451db80 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -40,21 +40,27 @@ static int generic_print_tuple(struct seq_file *s, return 0; } +static unsigned int *generic_get_timeouts(struct net *net) +{ + return &nf_ct_generic_timeout; +} + /* Returns verdict for packet, or -1 for invalid. */ -static int packet(struct nf_conn *ct, - const struct sk_buff *skb, - unsigned int dataoff, - enum ip_conntrack_info ctinfo, - u_int8_t pf, - unsigned int hooknum) +static int generic_packet(struct nf_conn *ct, + const struct sk_buff *skb, + unsigned int dataoff, + enum ip_conntrack_info ctinfo, + u_int8_t pf, + unsigned int hooknum, + unsigned int *timeout) { - nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_generic_timeout); + nf_ct_refresh_acct(ct, ctinfo, skb, *timeout); return NF_ACCEPT; } /* Called when a new connection for this protocol found. */ -static bool new(struct nf_conn *ct, const struct sk_buff *skb, - unsigned int dataoff) +static bool generic_new(struct nf_conn *ct, const struct sk_buff *skb, + unsigned int dataoff, unsigned int *timeouts) { return true; } @@ -93,8 +99,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_generic __read_mostly = .pkt_to_tuple = generic_pkt_to_tuple, .invert_tuple = generic_invert_tuple, .print_tuple = generic_print_tuple, - .packet = packet, - .new = new, + .packet = generic_packet, + .get_timeouts = generic_get_timeouts, + .new = generic_new, #ifdef CONFIG_SYSCTL .ctl_table_header = &generic_sysctl_header, .ctl_table = generic_sysctl_table, diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index 8144f22d159a..1bf01c95658b 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -235,13 +235,19 @@ static int gre_print_conntrack(struct seq_file *s, struct nf_conn *ct) (ct->proto.gre.stream_timeout / HZ)); } +static unsigned int *gre_get_timeouts(struct net *net) +{ + return gre_timeouts; +} + /* Returns verdict for packet, and may modify conntrack */ static int gre_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, u_int8_t pf, - unsigned int hooknum) + unsigned int hooknum, + unsigned int *timeouts) { /* If we've seen traffic both ways, this is a GRE connection. * Extend timeout. */ @@ -260,15 +266,15 @@ static int gre_packet(struct nf_conn *ct, /* Called when a new connection for this protocol found. */ static bool gre_new(struct nf_conn *ct, const struct sk_buff *skb, - unsigned int dataoff) + unsigned int dataoff, unsigned int *timeouts) { pr_debug(": "); nf_ct_dump_tuple(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); /* initialize to sane value. Ideally a conntrack helper * (e.g. in case of pptp) is increasing them */ - ct->proto.gre.stream_timeout = gre_timeouts[GRE_CT_REPLIED]; - ct->proto.gre.timeout = gre_timeouts[GRE_CT_UNREPLIED]; + ct->proto.gre.stream_timeout = timeouts[GRE_CT_REPLIED]; + ct->proto.gre.timeout = timeouts[GRE_CT_UNREPLIED]; return true; } @@ -295,6 +301,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 __read_mostly = { .invert_tuple = gre_invert_tuple, .print_tuple = gre_print_tuple, .print_conntrack = gre_print_conntrack, + .get_timeouts = gre_get_timeouts, .packet = gre_packet, .new = gre_new, .destroy = gre_destroy, diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index afa69136061a..2a0703371e24 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -279,13 +279,19 @@ static int sctp_new_state(enum ip_conntrack_dir dir, return sctp_conntracks[dir][i][cur_state]; } +static unsigned int *sctp_get_timeouts(struct net *net) +{ + return sctp_timeouts; +} + /* Returns verdict for packet, or -NF_ACCEPT for invalid. */ static int sctp_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, u_int8_t pf, - unsigned int hooknum) + unsigned int hooknum, + unsigned int *timeouts) { enum sctp_conntrack new_state, old_state; enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); @@ -370,7 +376,7 @@ static int sctp_packet(struct nf_conn *ct, } spin_unlock_bh(&ct->lock); - nf_ct_refresh_acct(ct, ctinfo, skb, sctp_timeouts[new_state]); + nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[new_state]); if (old_state == SCTP_CONNTRACK_COOKIE_ECHOED && dir == IP_CT_DIR_REPLY && @@ -390,7 +396,7 @@ out: /* Called when a new connection for this protocol found. */ static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb, - unsigned int dataoff) + unsigned int dataoff, unsigned int *timeouts) { enum sctp_conntrack new_state; const struct sctphdr *sh; @@ -664,6 +670,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = { .print_tuple = sctp_print_tuple, .print_conntrack = sctp_print_conntrack, .packet = sctp_packet, + .get_timeouts = sctp_get_timeouts, .new = sctp_new, .me = THIS_MODULE, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) @@ -694,6 +701,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = { .print_tuple = sctp_print_tuple, .print_conntrack = sctp_print_conntrack, .packet = sctp_packet, + .get_timeouts = sctp_get_timeouts, .new = sctp_new, .me = THIS_MODULE, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 57c778546094..8372bb43feb0 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -813,13 +813,19 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl, return NF_ACCEPT; } +static unsigned int *tcp_get_timeouts(struct net *net) +{ + return tcp_timeouts; +} + /* Returns verdict for packet, or -1 for invalid. */ static int tcp_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, u_int8_t pf, - unsigned int hooknum) + unsigned int hooknum, + unsigned int *timeouts) { struct net *net = nf_ct_net(ct); struct nf_conntrack_tuple *tuple; @@ -1014,14 +1020,14 @@ static int tcp_packet(struct nf_conn *ct, ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT; if (ct->proto.tcp.retrans >= nf_ct_tcp_max_retrans && - tcp_timeouts[new_state] > tcp_timeouts[TCP_CONNTRACK_RETRANS]) - timeout = tcp_timeouts[TCP_CONNTRACK_RETRANS]; + timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS]) + timeout = timeouts[TCP_CONNTRACK_RETRANS]; else if ((ct->proto.tcp.seen[0].flags | ct->proto.tcp.seen[1].flags) & IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED && - tcp_timeouts[new_state] > tcp_timeouts[TCP_CONNTRACK_UNACK]) - timeout = tcp_timeouts[TCP_CONNTRACK_UNACK]; + timeouts[new_state] > timeouts[TCP_CONNTRACK_UNACK]) + timeout = timeouts[TCP_CONNTRACK_UNACK]; else - timeout = tcp_timeouts[new_state]; + timeout = timeouts[new_state]; spin_unlock_bh(&ct->lock); if (new_state != old_state) @@ -1053,7 +1059,7 @@ static int tcp_packet(struct nf_conn *ct, /* Called when a new connection for this protocol found. */ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb, - unsigned int dataoff) + unsigned int dataoff, unsigned int *timeouts) { enum tcp_conntrack new_state; const struct tcphdr *th; @@ -1444,6 +1450,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly = .print_tuple = tcp_print_tuple, .print_conntrack = tcp_print_conntrack, .packet = tcp_packet, + .get_timeouts = tcp_get_timeouts, .new = tcp_new, .error = tcp_error, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) @@ -1476,6 +1483,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly = .print_tuple = tcp_print_tuple, .print_conntrack = tcp_print_conntrack, .packet = tcp_packet, + .get_timeouts = tcp_get_timeouts, .new = tcp_new, .error = tcp_error, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 5b24ff882f95..70e005992d5b 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -71,32 +71,38 @@ static int udp_print_tuple(struct seq_file *s, ntohs(tuple->dst.u.udp.port)); } +static unsigned int *udp_get_timeouts(struct net *net) +{ + return udp_timeouts; +} + /* Returns verdict for packet, and may modify conntracktype */ static int udp_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, u_int8_t pf, - unsigned int hooknum) + unsigned int hooknum, + unsigned int *timeouts) { /* If we've seen traffic both ways, this is some kind of UDP stream. Extend timeout. */ if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { nf_ct_refresh_acct(ct, ctinfo, skb, - udp_timeouts[UDP_CT_REPLIED]); + timeouts[UDP_CT_REPLIED]); /* Also, more likely to be important, and not a probe */ if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status)) nf_conntrack_event_cache(IPCT_ASSURED, ct); } else { nf_ct_refresh_acct(ct, ctinfo, skb, - udp_timeouts[UDP_CT_UNREPLIED]); + timeouts[UDP_CT_UNREPLIED]); } return NF_ACCEPT; } /* Called when a new connection for this protocol found. */ static bool udp_new(struct nf_conn *ct, const struct sk_buff *skb, - unsigned int dataoff) + unsigned int dataoff, unsigned int *timeouts) { return true; } @@ -196,6 +202,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly = .invert_tuple = udp_invert_tuple, .print_tuple = udp_print_tuple, .packet = udp_packet, + .get_timeouts = udp_get_timeouts, .new = udp_new, .error = udp_error, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) @@ -224,6 +231,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly = .invert_tuple = udp_invert_tuple, .print_tuple = udp_print_tuple, .packet = udp_packet, + .get_timeouts = udp_get_timeouts, .new = udp_new, .error = udp_error, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index e73071743e01..0b32ccb1d515 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -68,32 +68,38 @@ static int udplite_print_tuple(struct seq_file *s, ntohs(tuple->dst.u.udp.port)); } +static unsigned int *udplite_get_timeouts(struct net *net) +{ + return udplite_timeouts; +} + /* Returns verdict for packet, and may modify conntracktype */ static int udplite_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, u_int8_t pf, - unsigned int hooknum) + unsigned int hooknum, + unsigned int *timeouts) { /* If we've seen traffic both ways, this is some kind of UDP stream. Extend timeout. */ if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { nf_ct_refresh_acct(ct, ctinfo, skb, - udplite_timeouts[UDPLITE_CT_REPLIED]); + timeouts[UDPLITE_CT_REPLIED]); /* Also, more likely to be important, and not a probe */ if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status)) nf_conntrack_event_cache(IPCT_ASSURED, ct); } else { nf_ct_refresh_acct(ct, ctinfo, skb, - udplite_timeouts[UDPLITE_CT_UNREPLIED]); + timeouts[UDPLITE_CT_UNREPLIED]); } return NF_ACCEPT; } /* Called when a new connection for this protocol found. */ static bool udplite_new(struct nf_conn *ct, const struct sk_buff *skb, - unsigned int dataoff) + unsigned int dataoff, unsigned int *timeouts) { return true; } @@ -181,6 +187,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly = .invert_tuple = udplite_invert_tuple, .print_tuple = udplite_print_tuple, .packet = udplite_packet, + .get_timeouts = udplite_get_timeouts, .new = udplite_new, .error = udplite_error, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) @@ -205,6 +212,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly = .invert_tuple = udplite_invert_tuple, .print_tuple = udplite_print_tuple, .packet = udplite_packet, + .get_timeouts = udplite_get_timeouts, .new = udplite_new, .error = udplite_error, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) -- cgit v1.2.3 From 50978462300f74dc48aea4a38471cb69bdf741a5 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 28 Feb 2012 19:13:48 +0100 Subject: netfilter: add cttimeout infrastructure for fine timeout tuning This patch adds the infrastructure to add fine timeout tuning over nfnetlink. Now you can use the NFNL_SUBSYS_CTNETLINK_TIMEOUT subsystem to create/delete/dump timeout objects that contain some specific timeout policy for one flow. The follow up patches will allow you attach timeout policy object to conntrack via the CT target and the conntrack extension infrastructure. Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/Kbuild | 1 + include/linux/netfilter/nfnetlink.h | 3 +- include/linux/netfilter/nfnetlink_cttimeout.h | 114 +++++++ include/net/netfilter/nf_conntrack_l4proto.h | 11 + net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 47 +++ net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 47 +++ net/netfilter/Kconfig | 11 + net/netfilter/Makefile | 1 + net/netfilter/nf_conntrack_proto_dccp.c | 70 +++++ net/netfilter/nf_conntrack_proto_generic.c | 48 +++ net/netfilter/nf_conntrack_proto_gre.c | 55 ++++ net/netfilter/nf_conntrack_proto_sctp.c | 69 +++++ net/netfilter/nf_conntrack_proto_tcp.c | 127 ++++++++ net/netfilter/nf_conntrack_proto_udp.c | 64 ++++ net/netfilter/nf_conntrack_proto_udplite.c | 66 ++++ net/netfilter/nfnetlink_cttimeout.c | 398 +++++++++++++++++++++++++ 16 files changed, 1131 insertions(+), 1 deletion(-) create mode 100644 include/linux/netfilter/nfnetlink_cttimeout.h create mode 100644 net/netfilter/nfnetlink_cttimeout.c (limited to 'net/ipv4') diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild index aaa72aaa21de..1697036336b6 100644 --- a/include/linux/netfilter/Kbuild +++ b/include/linux/netfilter/Kbuild @@ -10,6 +10,7 @@ header-y += nfnetlink.h header-y += nfnetlink_acct.h header-y += nfnetlink_compat.h header-y += nfnetlink_conntrack.h +header-y += nfnetlink_cttimeout.h header-y += nfnetlink_log.h header-y += nfnetlink_queue.h header-y += x_tables.h diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index b64454c2f79f..6fd1f0d07e64 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -49,7 +49,8 @@ struct nfgenmsg { #define NFNL_SUBSYS_OSF 5 #define NFNL_SUBSYS_IPSET 6 #define NFNL_SUBSYS_ACCT 7 -#define NFNL_SUBSYS_COUNT 8 +#define NFNL_SUBSYS_CTNETLINK_TIMEOUT 8 +#define NFNL_SUBSYS_COUNT 9 #ifdef __KERNEL__ diff --git a/include/linux/netfilter/nfnetlink_cttimeout.h b/include/linux/netfilter/nfnetlink_cttimeout.h new file mode 100644 index 000000000000..a2810a7c5e30 --- /dev/null +++ b/include/linux/netfilter/nfnetlink_cttimeout.h @@ -0,0 +1,114 @@ +#ifndef _CTTIMEOUT_NETLINK_H +#define _CTTIMEOUT_NETLINK_H +#include + +enum ctnl_timeout_msg_types { + IPCTNL_MSG_TIMEOUT_NEW, + IPCTNL_MSG_TIMEOUT_GET, + IPCTNL_MSG_TIMEOUT_DELETE, + + IPCTNL_MSG_TIMEOUT_MAX +}; + +enum ctattr_timeout { + CTA_TIMEOUT_UNSPEC, + CTA_TIMEOUT_NAME, + CTA_TIMEOUT_L3PROTO, + CTA_TIMEOUT_L4PROTO, + CTA_TIMEOUT_DATA, + CTA_TIMEOUT_USE, + __CTA_TIMEOUT_MAX +}; +#define CTA_TIMEOUT_MAX (__CTA_TIMEOUT_MAX - 1) + +enum ctattr_timeout_generic { + CTA_TIMEOUT_GENERIC_UNSPEC, + CTA_TIMEOUT_GENERIC_TIMEOUT, + __CTA_TIMEOUT_GENERIC_MAX +}; +#define CTA_TIMEOUT_GENERIC_MAX (__CTA_TIMEOUT_GENERIC_MAX - 1) + +enum ctattr_timeout_tcp { + CTA_TIMEOUT_TCP_UNSPEC, + CTA_TIMEOUT_TCP_SYN_SENT, + CTA_TIMEOUT_TCP_SYN_RECV, + CTA_TIMEOUT_TCP_ESTABLISHED, + CTA_TIMEOUT_TCP_FIN_WAIT, + CTA_TIMEOUT_TCP_CLOSE_WAIT, + CTA_TIMEOUT_TCP_LAST_ACK, + CTA_TIMEOUT_TCP_TIME_WAIT, + CTA_TIMEOUT_TCP_CLOSE, + CTA_TIMEOUT_TCP_SYN_SENT2, + CTA_TIMEOUT_TCP_RETRANS, + CTA_TIMEOUT_TCP_UNACK, + __CTA_TIMEOUT_TCP_MAX +}; +#define CTA_TIMEOUT_TCP_MAX (__CTA_TIMEOUT_TCP_MAX - 1) + +enum ctattr_timeout_udp { + CTA_TIMEOUT_UDP_UNSPEC, + CTA_TIMEOUT_UDP_UNREPLIED, + CTA_TIMEOUT_UDP_REPLIED, + __CTA_TIMEOUT_UDP_MAX +}; +#define CTA_TIMEOUT_UDP_MAX (__CTA_TIMEOUT_UDP_MAX - 1) + +enum ctattr_timeout_udplite { + CTA_TIMEOUT_UDPLITE_UNSPEC, + CTA_TIMEOUT_UDPLITE_UNREPLIED, + CTA_TIMEOUT_UDPLITE_REPLIED, + __CTA_TIMEOUT_UDPLITE_MAX +}; +#define CTA_TIMEOUT_UDPLITE_MAX (__CTA_TIMEOUT_UDPLITE_MAX - 1) + +enum ctattr_timeout_icmp { + CTA_TIMEOUT_ICMP_UNSPEC, + CTA_TIMEOUT_ICMP_TIMEOUT, + __CTA_TIMEOUT_ICMP_MAX +}; +#define CTA_TIMEOUT_ICMP_MAX (__CTA_TIMEOUT_ICMP_MAX - 1) + +enum ctattr_timeout_dccp { + CTA_TIMEOUT_DCCP_UNSPEC, + CTA_TIMEOUT_DCCP_REQUEST, + CTA_TIMEOUT_DCCP_RESPOND, + CTA_TIMEOUT_DCCP_PARTOPEN, + CTA_TIMEOUT_DCCP_OPEN, + CTA_TIMEOUT_DCCP_CLOSEREQ, + CTA_TIMEOUT_DCCP_CLOSING, + CTA_TIMEOUT_DCCP_TIMEWAIT, + __CTA_TIMEOUT_DCCP_MAX +}; +#define CTA_TIMEOUT_DCCP_MAX (__CTA_TIMEOUT_DCCP_MAX - 1) + +enum ctattr_timeout_sctp { + CTA_TIMEOUT_SCTP_UNSPEC, + CTA_TIMEOUT_SCTP_CLOSED, + CTA_TIMEOUT_SCTP_COOKIE_WAIT, + CTA_TIMEOUT_SCTP_COOKIE_ECHOED, + CTA_TIMEOUT_SCTP_ESTABLISHED, + CTA_TIMEOUT_SCTP_SHUTDOWN_SENT, + CTA_TIMEOUT_SCTP_SHUTDOWN_RECD, + CTA_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT, + __CTA_TIMEOUT_SCTP_MAX +}; +#define CTA_TIMEOUT_SCTP_MAX (__CTA_TIMEOUT_SCTP_MAX - 1) + +enum ctattr_timeout_icmpv6 { + CTA_TIMEOUT_ICMPV6_UNSPEC, + CTA_TIMEOUT_ICMPV6_TIMEOUT, + __CTA_TIMEOUT_ICMPV6_MAX +}; +#define CTA_TIMEOUT_ICMPV6_MAX (__CTA_TIMEOUT_ICMPV6_MAX - 1) + +enum ctattr_timeout_gre { + CTA_TIMEOUT_GRE_UNSPEC, + CTA_TIMEOUT_GRE_UNREPLIED, + CTA_TIMEOUT_GRE_REPLIED, + __CTA_TIMEOUT_GRE_MAX +}; +#define CTA_TIMEOUT_GRE_MAX (__CTA_TIMEOUT_GRE_MAX - 1) + +#define CTNL_TIMEOUT_NAME_MAX 32 + +#endif diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index c48b67405aa0..90c67c7db7e9 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -83,6 +83,17 @@ struct nf_conntrack_l4proto { size_t nla_size; +#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) + struct { + size_t obj_size; + int (*nlattr_to_obj)(struct nlattr *tb[], void *data); + int (*obj_to_nlattr)(struct sk_buff *skb, const void *data); + + unsigned int nlattr_max; + const struct nla_policy *nla_policy; + } ctnl_timeout; +#endif + #ifdef CONFIG_SYSCTL struct ctl_table_header **ctl_table_header; struct ctl_table *ctl_table; diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 6b801124b31f..7cbe9cb261c2 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -269,6 +269,44 @@ static int icmp_nlattr_tuple_size(void) } #endif +#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) + +#include +#include + +static int icmp_timeout_nlattr_to_obj(struct nlattr *tb[], void *data) +{ + unsigned int *timeout = data; + + if (tb[CTA_TIMEOUT_ICMP_TIMEOUT]) { + *timeout = + ntohl(nla_get_be32(tb[CTA_TIMEOUT_ICMP_TIMEOUT])) * HZ; + } else { + /* Set default ICMP timeout. */ + *timeout = nf_ct_icmp_timeout; + } + return 0; +} + +static int +icmp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) +{ + const unsigned int *timeout = data; + + NLA_PUT_BE32(skb, CTA_TIMEOUT_ICMP_TIMEOUT, htonl(*timeout / HZ)); + + return 0; + +nla_put_failure: + return -ENOSPC; +} + +static const struct nla_policy +icmp_timeout_nla_policy[CTA_TIMEOUT_ICMP_MAX+1] = { + [CTA_TIMEOUT_ICMP_TIMEOUT] = { .type = NLA_U32 }, +}; +#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ + #ifdef CONFIG_SYSCTL static struct ctl_table_header *icmp_sysctl_header; static struct ctl_table icmp_sysctl_table[] = { @@ -315,6 +353,15 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly = .nlattr_to_tuple = icmp_nlattr_to_tuple, .nla_policy = icmp_nla_policy, #endif +#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) + .ctnl_timeout = { + .nlattr_to_obj = icmp_timeout_nlattr_to_obj, + .obj_to_nlattr = icmp_timeout_obj_to_nlattr, + .nlattr_max = CTA_TIMEOUT_ICMP_MAX, + .obj_size = sizeof(unsigned int), + .nla_policy = icmp_timeout_nla_policy, + }, +#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ #ifdef CONFIG_SYSCTL .ctl_table_header = &icmp_sysctl_header, .ctl_table = icmp_sysctl_table, diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 2eb9751eb7a8..92cc9f2931ae 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -276,6 +276,44 @@ static int icmpv6_nlattr_tuple_size(void) } #endif +#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) + +#include +#include + +static int icmpv6_timeout_nlattr_to_obj(struct nlattr *tb[], void *data) +{ + unsigned int *timeout = data; + + if (tb[CTA_TIMEOUT_ICMPV6_TIMEOUT]) { + *timeout = + ntohl(nla_get_be32(tb[CTA_TIMEOUT_ICMPV6_TIMEOUT])) * HZ; + } else { + /* Set default ICMPv6 timeout. */ + *timeout = nf_ct_icmpv6_timeout; + } + return 0; +} + +static int +icmpv6_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) +{ + const unsigned int *timeout = data; + + NLA_PUT_BE32(skb, CTA_TIMEOUT_ICMPV6_TIMEOUT, htonl(*timeout / HZ)); + + return 0; + +nla_put_failure: + return -ENOSPC; +} + +static const struct nla_policy +icmpv6_timeout_nla_policy[CTA_TIMEOUT_ICMPV6_MAX+1] = { + [CTA_TIMEOUT_ICMPV6_TIMEOUT] = { .type = NLA_U32 }, +}; +#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ + #ifdef CONFIG_SYSCTL static struct ctl_table_header *icmpv6_sysctl_header; static struct ctl_table icmpv6_sysctl_table[] = { @@ -308,6 +346,15 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly = .nlattr_to_tuple = icmpv6_nlattr_to_tuple, .nla_policy = icmpv6_nla_policy, #endif +#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) + .ctnl_timeout = { + .nlattr_to_obj = icmpv6_timeout_nlattr_to_obj, + .obj_to_nlattr = icmpv6_timeout_obj_to_nlattr, + .nlattr_max = CTA_TIMEOUT_ICMP_MAX, + .obj_size = sizeof(unsigned int), + .nla_policy = icmpv6_timeout_nla_policy, + }, +#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ #ifdef CONFIG_SYSCTL .ctl_table_header = &icmpv6_sysctl_header, .ctl_table = icmpv6_sysctl_table, diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index b895d8b13215..f3efb6570dd9 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -314,6 +314,17 @@ config NF_CT_NETLINK help This option enables support for a netlink-based userspace interface +config NF_CT_NETLINK_TIMEOUT + tristate 'Connection tracking timeout tuning via Netlink' + select NETFILTER_NETLINK + depends on NETFILTER_ADVANCED + help + This option enables support for connection tracking timeout + fine-grain tuning. This allows you to attach specific timeout + policies to flows, instead of using the global timeout policy. + + If unsure, say `N'. + endif # NF_CONNTRACK # transparent proxy support diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index a28c2a6563f8..cf7cdd630bdd 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -22,6 +22,7 @@ obj-$(CONFIG_NF_CT_PROTO_UDPLITE) += nf_conntrack_proto_udplite.o # netlink interface for nf_conntrack obj-$(CONFIG_NF_CT_NETLINK) += nf_conntrack_netlink.o +obj-$(CONFIG_NF_CT_NETLINK_TIMEOUT) += nfnetlink_cttimeout.o # connection tracking helpers nf_conntrack_h323-objs := nf_conntrack_h323_main.o nf_conntrack_h323_asn1.o diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 8ea33598a0a7..24fdce256cb0 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -706,8 +706,60 @@ static int dccp_nlattr_size(void) return nla_total_size(0) /* CTA_PROTOINFO_DCCP */ + nla_policy_len(dccp_nla_policy, CTA_PROTOINFO_DCCP_MAX + 1); } + #endif +#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) + +#include +#include + +static int dccp_timeout_nlattr_to_obj(struct nlattr *tb[], void *data) +{ + struct dccp_net *dn = dccp_pernet(&init_net); + unsigned int *timeouts = data; + int i; + + /* set default DCCP timeouts. */ + for (i=0; idccp_timeout[i]; + + /* there's a 1:1 mapping between attributes and protocol states. */ + for (i=CTA_TIMEOUT_DCCP_UNSPEC+1; i +#include + +static int generic_timeout_nlattr_to_obj(struct nlattr *tb[], void *data) +{ + unsigned int *timeout = data; + + if (tb[CTA_TIMEOUT_GENERIC_TIMEOUT]) + *timeout = + ntohl(nla_get_be32(tb[CTA_TIMEOUT_GENERIC_TIMEOUT])) * HZ; + else { + /* Set default generic timeout. */ + *timeout = nf_ct_generic_timeout; + } + + return 0; +} + +static int +generic_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) +{ + const unsigned int *timeout = data; + + NLA_PUT_BE32(skb, CTA_TIMEOUT_GENERIC_TIMEOUT, htonl(*timeout / HZ)); + + return 0; + +nla_put_failure: + return -ENOSPC; +} + +static const struct nla_policy +generic_timeout_nla_policy[CTA_TIMEOUT_GENERIC_MAX+1] = { + [CTA_TIMEOUT_GENERIC_TIMEOUT] = { .type = NLA_U32 }, +}; +#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ + #ifdef CONFIG_SYSCTL static struct ctl_table_header *generic_sysctl_header; static struct ctl_table generic_sysctl_table[] = { @@ -102,6 +141,15 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_generic __read_mostly = .packet = generic_packet, .get_timeouts = generic_get_timeouts, .new = generic_new, +#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) + .ctnl_timeout = { + .nlattr_to_obj = generic_timeout_nlattr_to_obj, + .obj_to_nlattr = generic_timeout_obj_to_nlattr, + .nlattr_max = CTA_TIMEOUT_GENERIC_MAX, + .obj_size = sizeof(unsigned int), + .nla_policy = generic_timeout_nla_policy, + }, +#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ #ifdef CONFIG_SYSCTL .ctl_table_header = &generic_sysctl_header, .ctl_table = generic_sysctl_table, diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index 1bf01c95658b..659648c4b14a 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -292,6 +292,52 @@ static void gre_destroy(struct nf_conn *ct) nf_ct_gre_keymap_destroy(master); } +#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) + +#include +#include + +static int gre_timeout_nlattr_to_obj(struct nlattr *tb[], void *data) +{ + unsigned int *timeouts = data; + + /* set default timeouts for GRE. */ + timeouts[GRE_CT_UNREPLIED] = gre_timeouts[GRE_CT_UNREPLIED]; + timeouts[GRE_CT_REPLIED] = gre_timeouts[GRE_CT_REPLIED]; + + if (tb[CTA_TIMEOUT_GRE_UNREPLIED]) { + timeouts[GRE_CT_UNREPLIED] = + ntohl(nla_get_be32(tb[CTA_TIMEOUT_GRE_UNREPLIED])) * HZ; + } + if (tb[CTA_TIMEOUT_GRE_REPLIED]) { + timeouts[GRE_CT_REPLIED] = + ntohl(nla_get_be32(tb[CTA_TIMEOUT_GRE_REPLIED])) * HZ; + } + return 0; +} + +static int +gre_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) +{ + const unsigned int *timeouts = data; + + NLA_PUT_BE32(skb, CTA_TIMEOUT_GRE_UNREPLIED, + htonl(timeouts[GRE_CT_UNREPLIED] / HZ)); + NLA_PUT_BE32(skb, CTA_TIMEOUT_GRE_REPLIED, + htonl(timeouts[GRE_CT_REPLIED] / HZ)); + return 0; + +nla_put_failure: + return -ENOSPC; +} + +static const struct nla_policy +gre_timeout_nla_policy[CTA_TIMEOUT_GRE_MAX+1] = { + [CTA_TIMEOUT_GRE_UNREPLIED] = { .type = NLA_U32 }, + [CTA_TIMEOUT_GRE_REPLIED] = { .type = NLA_U32 }, +}; +#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ + /* protocol helper struct */ static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 __read_mostly = { .l3proto = AF_INET, @@ -312,6 +358,15 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 __read_mostly = { .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, .nla_policy = nf_ct_port_nla_policy, #endif +#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) + .ctnl_timeout = { + .nlattr_to_obj = gre_timeout_nlattr_to_obj, + .obj_to_nlattr = gre_timeout_obj_to_nlattr, + .nlattr_max = CTA_TIMEOUT_GRE_MAX, + .obj_size = sizeof(unsigned int) * GRE_CT_MAX, + .nla_policy = gre_timeout_nla_policy, + }, +#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ }; static int proto_gre_net_init(struct net *net) diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 2a0703371e24..72b5088592dc 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -549,6 +549,57 @@ static int sctp_nlattr_size(void) } #endif +#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) + +#include +#include + +static int sctp_timeout_nlattr_to_obj(struct nlattr *tb[], void *data) +{ + unsigned int *timeouts = data; + int i; + + /* set default SCTP timeouts. */ + for (i=0; i +#include + +static int tcp_timeout_nlattr_to_obj(struct nlattr *tb[], void *data) +{ + unsigned int *timeouts = data; + int i; + + /* set default TCP timeouts. */ + for (i=0; i +#include + +static int udp_timeout_nlattr_to_obj(struct nlattr *tb[], void *data) +{ + unsigned int *timeouts = data; + + /* set default timeouts for UDP. */ + timeouts[UDP_CT_UNREPLIED] = udp_timeouts[UDP_CT_UNREPLIED]; + timeouts[UDP_CT_REPLIED] = udp_timeouts[UDP_CT_REPLIED]; + + if (tb[CTA_TIMEOUT_UDP_UNREPLIED]) { + timeouts[UDP_CT_UNREPLIED] = + ntohl(nla_get_be32(tb[CTA_TIMEOUT_UDP_UNREPLIED])) * HZ; + } + if (tb[CTA_TIMEOUT_UDP_REPLIED]) { + timeouts[UDP_CT_REPLIED] = + ntohl(nla_get_be32(tb[CTA_TIMEOUT_UDP_REPLIED])) * HZ; + } + return 0; +} + +static int +udp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) +{ + const unsigned int *timeouts = data; + + NLA_PUT_BE32(skb, CTA_TIMEOUT_UDP_UNREPLIED, + htonl(timeouts[UDP_CT_UNREPLIED] / HZ)); + NLA_PUT_BE32(skb, CTA_TIMEOUT_UDP_REPLIED, + htonl(timeouts[UDP_CT_REPLIED] / HZ)); + return 0; + +nla_put_failure: + return -ENOSPC; +} + +static const struct nla_policy +udp_timeout_nla_policy[CTA_TIMEOUT_UDP_MAX+1] = { + [CTA_TIMEOUT_UDP_UNREPLIED] = { .type = NLA_U32 }, + [CTA_TIMEOUT_UDP_REPLIED] = { .type = NLA_U32 }, +}; +#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ + #ifdef CONFIG_SYSCTL static unsigned int udp_sysctl_table_users; static struct ctl_table_header *udp_sysctl_header; @@ -211,6 +257,15 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly = .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, .nla_policy = nf_ct_port_nla_policy, #endif +#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) + .ctnl_timeout = { + .nlattr_to_obj = udp_timeout_nlattr_to_obj, + .obj_to_nlattr = udp_timeout_obj_to_nlattr, + .nlattr_max = CTA_TIMEOUT_UDP_MAX, + .obj_size = sizeof(unsigned int) * CTA_TIMEOUT_UDP_MAX, + .nla_policy = udp_timeout_nla_policy, + }, +#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ #ifdef CONFIG_SYSCTL .ctl_table_users = &udp_sysctl_table_users, .ctl_table_header = &udp_sysctl_header, @@ -240,6 +295,15 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly = .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, .nla_policy = nf_ct_port_nla_policy, #endif +#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) + .ctnl_timeout = { + .nlattr_to_obj = udp_timeout_nlattr_to_obj, + .obj_to_nlattr = udp_timeout_obj_to_nlattr, + .nlattr_max = CTA_TIMEOUT_UDP_MAX, + .obj_size = sizeof(unsigned int) * CTA_TIMEOUT_UDP_MAX, + .nla_policy = udp_timeout_nla_policy, + }, +#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ #ifdef CONFIG_SYSCTL .ctl_table_users = &udp_sysctl_table_users, .ctl_table_header = &udp_sysctl_header, diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index 0b32ccb1d515..e0606392cda0 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -156,6 +156,52 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl, return NF_ACCEPT; } +#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) + +#include +#include + +static int udplite_timeout_nlattr_to_obj(struct nlattr *tb[], void *data) +{ + unsigned int *timeouts = data; + + /* set default timeouts for UDPlite. */ + timeouts[UDPLITE_CT_UNREPLIED] = udplite_timeouts[UDPLITE_CT_UNREPLIED]; + timeouts[UDPLITE_CT_REPLIED] = udplite_timeouts[UDPLITE_CT_REPLIED]; + + if (tb[CTA_TIMEOUT_UDPLITE_UNREPLIED]) { + timeouts[UDPLITE_CT_UNREPLIED] = + ntohl(nla_get_be32(tb[CTA_TIMEOUT_UDPLITE_UNREPLIED])) * HZ; + } + if (tb[CTA_TIMEOUT_UDPLITE_REPLIED]) { + timeouts[UDPLITE_CT_REPLIED] = + ntohl(nla_get_be32(tb[CTA_TIMEOUT_UDPLITE_REPLIED])) * HZ; + } + return 0; +} + +static int +udplite_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) +{ + const unsigned int *timeouts = data; + + NLA_PUT_BE32(skb, CTA_TIMEOUT_UDPLITE_UNREPLIED, + htonl(timeouts[UDPLITE_CT_UNREPLIED] / HZ)); + NLA_PUT_BE32(skb, CTA_TIMEOUT_UDPLITE_REPLIED, + htonl(timeouts[UDPLITE_CT_REPLIED] / HZ)); + return 0; + +nla_put_failure: + return -ENOSPC; +} + +static const struct nla_policy +udplite_timeout_nla_policy[CTA_TIMEOUT_UDPLITE_MAX+1] = { + [CTA_TIMEOUT_UDPLITE_UNREPLIED] = { .type = NLA_U32 }, + [CTA_TIMEOUT_UDPLITE_REPLIED] = { .type = NLA_U32 }, +}; +#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ + #ifdef CONFIG_SYSCTL static unsigned int udplite_sysctl_table_users; static struct ctl_table_header *udplite_sysctl_header; @@ -196,6 +242,16 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly = .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, .nla_policy = nf_ct_port_nla_policy, #endif +#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) + .ctnl_timeout = { + .nlattr_to_obj = udplite_timeout_nlattr_to_obj, + .obj_to_nlattr = udplite_timeout_obj_to_nlattr, + .nlattr_max = CTA_TIMEOUT_UDPLITE_MAX, + .obj_size = sizeof(unsigned int) * + CTA_TIMEOUT_UDPLITE_MAX, + .nla_policy = udplite_timeout_nla_policy, + }, +#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ #ifdef CONFIG_SYSCTL .ctl_table_users = &udplite_sysctl_table_users, .ctl_table_header = &udplite_sysctl_header, @@ -221,6 +277,16 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly = .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, .nla_policy = nf_ct_port_nla_policy, #endif +#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) + .ctnl_timeout = { + .nlattr_to_obj = udplite_timeout_nlattr_to_obj, + .obj_to_nlattr = udplite_timeout_obj_to_nlattr, + .nlattr_max = CTA_TIMEOUT_UDPLITE_MAX, + .obj_size = sizeof(unsigned int) * + CTA_TIMEOUT_UDPLITE_MAX, + .nla_policy = udplite_timeout_nla_policy, + }, +#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ #ifdef CONFIG_SYSCTL .ctl_table_users = &udplite_sysctl_table_users, .ctl_table_header = &udplite_sysctl_header, diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c new file mode 100644 index 000000000000..b860d5217189 --- /dev/null +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -0,0 +1,398 @@ +/* + * (C) 2012 by Pablo Neira Ayuso + * (C) 2012 by Vyatta Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation (or any later at your option). + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Pablo Neira Ayuso "); +MODULE_DESCRIPTION("cttimeout: Extended Netfilter Connection Tracking timeout tuning"); + +struct ctnl_timeout { + struct list_head head; + struct rcu_head rcu_head; + atomic_t refcnt; + char name[CTNL_TIMEOUT_NAME_MAX]; + __u16 l3num; + __u8 l4num; + char data[0]; +}; + +static LIST_HEAD(cttimeout_list); + +static const struct nla_policy cttimeout_nla_policy[CTA_TIMEOUT_MAX+1] = { + [CTA_TIMEOUT_NAME] = { .type = NLA_NUL_STRING }, + [CTA_TIMEOUT_L3PROTO] = { .type = NLA_U16 }, + [CTA_TIMEOUT_L4PROTO] = { .type = NLA_U8 }, + [CTA_TIMEOUT_DATA] = { .type = NLA_NESTED }, +}; + +static int +ctnl_timeout_parse_policy(struct ctnl_timeout *timeout, + struct nf_conntrack_l4proto *l4proto, + const struct nlattr *attr) +{ + int ret = 0; + + if (likely(l4proto->ctnl_timeout.nlattr_to_obj)) { + struct nlattr *tb[l4proto->ctnl_timeout.nlattr_max+1]; + + nla_parse_nested(tb, l4proto->ctnl_timeout.nlattr_max, + attr, l4proto->ctnl_timeout.nla_policy); + + ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, &timeout->data); + } + return ret; +} + +static int +cttimeout_new_timeout(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) +{ + __u16 l3num; + __u8 l4num; + struct nf_conntrack_l4proto *l4proto; + struct ctnl_timeout *timeout, *matching = NULL; + char *name; + int ret; + + if (!cda[CTA_TIMEOUT_NAME] || + !cda[CTA_TIMEOUT_L3PROTO] || + !cda[CTA_TIMEOUT_L4PROTO] || + !cda[CTA_TIMEOUT_DATA]) + return -EINVAL; + + name = nla_data(cda[CTA_TIMEOUT_NAME]); + l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO])); + l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]); + + list_for_each_entry(timeout, &cttimeout_list, head) { + if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0) + continue; + + if (nlh->nlmsg_flags & NLM_F_EXCL) + return -EEXIST; + + matching = timeout; + break; + } + + l4proto = __nf_ct_l4proto_find(l3num, l4num); + + /* This protocol is not supportted, skip. */ + if (l4proto->l4proto != l4num) + return -EOPNOTSUPP; + + if (matching) { + if (nlh->nlmsg_flags & NLM_F_REPLACE) { + /* You cannot replace one timeout policy by another of + * different kind, sorry. + */ + if (matching->l3num != l3num || + matching->l4num != l4num) + return -EINVAL; + + ret = ctnl_timeout_parse_policy(matching, l4proto, + cda[CTA_TIMEOUT_DATA]); + return ret; + } + return -EBUSY; + } + + timeout = kzalloc(sizeof(struct ctnl_timeout) + + l4proto->ctnl_timeout.obj_size, GFP_KERNEL); + if (timeout == NULL) + return -ENOMEM; + + ret = ctnl_timeout_parse_policy(timeout, l4proto, + cda[CTA_TIMEOUT_DATA]); + if (ret < 0) + goto err; + + strcpy(timeout->name, nla_data(cda[CTA_TIMEOUT_NAME])); + timeout->l3num = l3num; + timeout->l4num = l4num; + atomic_set(&timeout->refcnt, 1); + list_add_tail_rcu(&timeout->head, &cttimeout_list); + + return 0; +err: + kfree(timeout); + return ret; +} + +static int +ctnl_timeout_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type, + int event, struct ctnl_timeout *timeout) +{ + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + unsigned int flags = pid ? NLM_F_MULTI : 0; + struct nf_conntrack_l4proto *l4proto; + + event |= NFNL_SUBSYS_CTNETLINK_TIMEOUT << 8; + nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags); + if (nlh == NULL) + goto nlmsg_failure; + + nfmsg = nlmsg_data(nlh); + nfmsg->nfgen_family = AF_UNSPEC; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + NLA_PUT_STRING(skb, CTA_TIMEOUT_NAME, timeout->name); + NLA_PUT_BE16(skb, CTA_TIMEOUT_L3PROTO, htons(timeout->l3num)); + NLA_PUT_U8(skb, CTA_TIMEOUT_L4PROTO, timeout->l4num); + NLA_PUT_BE32(skb, CTA_TIMEOUT_USE, + htonl(atomic_read(&timeout->refcnt))); + + l4proto = __nf_ct_l4proto_find(timeout->l3num, timeout->l4num); + + /* If the timeout object does not match the layer 4 protocol tracker, + * then skip dumping the data part since we don't know how to + * interpret it. This may happen for UPDlite, SCTP and DCCP since + * you can unload the module. + */ + if (timeout->l4num != l4proto->l4proto) + goto out; + + if (likely(l4proto->ctnl_timeout.obj_to_nlattr)) { + struct nlattr *nest_parms; + int ret; + + nest_parms = nla_nest_start(skb, + CTA_TIMEOUT_DATA | NLA_F_NESTED); + if (!nest_parms) + goto nla_put_failure; + + ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, &timeout->data); + if (ret < 0) + goto nla_put_failure; + + nla_nest_end(skb, nest_parms); + } +out: + nlmsg_end(skb, nlh); + return skb->len; + +nlmsg_failure: +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -1; +} + +static int +ctnl_timeout_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct ctnl_timeout *cur, *last; + + if (cb->args[2]) + return 0; + + last = (struct ctnl_timeout *)cb->args[1]; + if (cb->args[1]) + cb->args[1] = 0; + + rcu_read_lock(); + list_for_each_entry_rcu(cur, &cttimeout_list, head) { + if (last && cur != last) + continue; + + if (ctnl_timeout_fill_info(skb, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, + NFNL_MSG_TYPE(cb->nlh->nlmsg_type), + IPCTNL_MSG_TIMEOUT_NEW, cur) < 0) { + cb->args[1] = (unsigned long)cur; + break; + } + } + if (!cb->args[1]) + cb->args[2] = 1; + rcu_read_unlock(); + return skb->len; +} + +static int +cttimeout_get_timeout(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) +{ + int ret = -ENOENT; + char *name; + struct ctnl_timeout *cur; + + if (nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = ctnl_timeout_dump, + }; + return netlink_dump_start(ctnl, skb, nlh, &c); + } + + if (!cda[CTA_TIMEOUT_NAME]) + return -EINVAL; + name = nla_data(cda[CTA_TIMEOUT_NAME]); + + list_for_each_entry(cur, &cttimeout_list, head) { + struct sk_buff *skb2; + + if (strncmp(cur->name, name, CTNL_TIMEOUT_NAME_MAX) != 0) + continue; + + skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (skb2 == NULL) { + ret = -ENOMEM; + break; + } + + ret = ctnl_timeout_fill_info(skb2, NETLINK_CB(skb).pid, + nlh->nlmsg_seq, + NFNL_MSG_TYPE(nlh->nlmsg_type), + IPCTNL_MSG_TIMEOUT_NEW, cur); + if (ret <= 0) { + kfree_skb(skb2); + break; + } + ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, + MSG_DONTWAIT); + if (ret > 0) + ret = 0; + + /* this avoids a loop in nfnetlink. */ + return ret == -EAGAIN ? -ENOBUFS : ret; + } + return ret; +} + +/* try to delete object, fail if it is still in use. */ +static int ctnl_timeout_try_del(struct ctnl_timeout *timeout) +{ + int ret = 0; + + /* we want to avoid races with nf_ct_timeout_find_get. */ + if (atomic_dec_and_test(&timeout->refcnt)) { + /* We are protected by nfnl mutex. */ + list_del_rcu(&timeout->head); + kfree_rcu(timeout, rcu_head); + } else { + /* still in use, restore reference counter. */ + atomic_inc(&timeout->refcnt); + ret = -EBUSY; + } + return ret; +} + +static int +cttimeout_del_timeout(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) +{ + char *name; + struct ctnl_timeout *cur; + int ret = -ENOENT; + + if (!cda[CTA_TIMEOUT_NAME]) { + list_for_each_entry(cur, &cttimeout_list, head) + ctnl_timeout_try_del(cur); + + return 0; + } + name = nla_data(cda[CTA_TIMEOUT_NAME]); + + list_for_each_entry(cur, &cttimeout_list, head) { + if (strncmp(cur->name, name, CTNL_TIMEOUT_NAME_MAX) != 0) + continue; + + ret = ctnl_timeout_try_del(cur); + if (ret < 0) + return ret; + + break; + } + return ret; +} + +static const struct nfnl_callback cttimeout_cb[IPCTNL_MSG_TIMEOUT_MAX] = { + [IPCTNL_MSG_TIMEOUT_NEW] = { .call = cttimeout_new_timeout, + .attr_count = CTA_TIMEOUT_MAX, + .policy = cttimeout_nla_policy }, + [IPCTNL_MSG_TIMEOUT_GET] = { .call = cttimeout_get_timeout, + .attr_count = CTA_TIMEOUT_MAX, + .policy = cttimeout_nla_policy }, + [IPCTNL_MSG_TIMEOUT_DELETE] = { .call = cttimeout_del_timeout, + .attr_count = CTA_TIMEOUT_MAX, + .policy = cttimeout_nla_policy }, +}; + +static const struct nfnetlink_subsystem cttimeout_subsys = { + .name = "conntrack_timeout", + .subsys_id = NFNL_SUBSYS_CTNETLINK_TIMEOUT, + .cb_count = IPCTNL_MSG_TIMEOUT_MAX, + .cb = cttimeout_cb, +}; + +MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK_TIMEOUT); + +static int __init cttimeout_init(void) +{ + int ret; + + ret = nfnetlink_subsys_register(&cttimeout_subsys); + if (ret < 0) { + pr_err("cttimeout_init: cannot register cttimeout with " + "nfnetlink.\n"); + goto err_out; + } + return 0; + +err_out: + return ret; +} + +static void __exit cttimeout_exit(void) +{ + struct ctnl_timeout *cur, *tmp; + + pr_info("cttimeout: unregistering from nfnetlink.\n"); + + nfnetlink_subsys_unregister(&cttimeout_subsys); + list_for_each_entry_safe(cur, tmp, &cttimeout_list, head) { + list_del_rcu(&cur->head); + /* We are sure that our objects have no clients at this point, + * it's safe to release them all without checking refcnt. + */ + kfree_rcu(cur, rcu_head); + } +} + +module_init(cttimeout_init); +module_exit(cttimeout_exit); -- cgit v1.2.3 From b4fb05ea402cb6930b40d3152d8acabc391b23e2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 7 Mar 2012 04:45:43 +0000 Subject: tcp: md5: correct a RCU lockdep splat commit a8afca0329 (tcp: md5: protects md5sig_info with RCU) added a lockdep splat in tcp_md5_do_lookup() in case a timer fires a tcp retransmit. At this point, socket lock is owned by the sofirq handler, not the user, so we should adjust a bit the lockdep condition, as we dont hold rcu_read_lock(). Signed-off-by: Eric Dumazet Reported-by: Valdis Kletnieks Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net/ipv4') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 94abee8cf563..507924b640ef 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -927,7 +927,8 @@ struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk, /* caller either holds rcu_read_lock() or socket lock */ md5sig = rcu_dereference_check(tp->md5sig_info, - sock_owned_by_user(sk)); + sock_owned_by_user(sk) || + lockdep_is_held(&sk->sk_lock.slock)); if (!md5sig) return NULL; #if IS_ENABLED(CONFIG_IPV6) -- cgit v1.2.3 From ba57b4db2624793c6eb8f2c051c9f7b8a6e7b6a6 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 7 Mar 2012 20:45:32 -0500 Subject: ipv4: Make ip_call_ra_chain() return bool. Signed-off-by: David S. Miller --- include/net/ip.h | 2 +- net/ipv4/ip_input.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'net/ipv4') diff --git a/include/net/ip.h b/include/net/ip.h index 775009f9eaba..b53d65f24f7b 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -388,7 +388,7 @@ static inline int sk_mc_loop(struct sock *sk) return 1; } -extern int ip_call_ra_chain(struct sk_buff *skb); +extern bool ip_call_ra_chain(struct sk_buff *skb); /* * Functions provided by ip_fragment.c diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 073a9b01c40c..70afe5bf1945 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -148,7 +148,7 @@ /* * Process Router Attention IP option (RFC 2113) */ -int ip_call_ra_chain(struct sk_buff *skb) +bool ip_call_ra_chain(struct sk_buff *skb) { struct ip_ra_chain *ra; u8 protocol = ip_hdr(skb)->protocol; @@ -167,7 +167,7 @@ int ip_call_ra_chain(struct sk_buff *skb) net_eq(sock_net(sk), dev_net(dev))) { if (ip_is_fragment(ip_hdr(skb))) { if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN)) - return 1; + return true; } if (last) { struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); @@ -180,9 +180,9 @@ int ip_call_ra_chain(struct sk_buff *skb) if (last) { raw_rcv(last, skb); - return 1; + return true; } - return 0; + return false; } static int ip_local_deliver_finish(struct sk_buff *skb) -- cgit v1.2.3 From 6a91395f20119d696330e2604451614a64369d1b Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 7 Mar 2012 20:48:08 -0500 Subject: ipv4: Make ip_rcv_options() return bool. Signed-off-by: David S. Miller --- net/ipv4/ip_input.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 70afe5bf1945..bdaa2c5c28e4 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -265,7 +265,7 @@ int ip_local_deliver(struct sk_buff *skb) ip_local_deliver_finish); } -static inline int ip_rcv_options(struct sk_buff *skb) +static inline bool ip_rcv_options(struct sk_buff *skb) { struct ip_options *opt; const struct iphdr *iph; @@ -309,9 +309,9 @@ static inline int ip_rcv_options(struct sk_buff *skb) goto drop; } - return 0; + return false; drop: - return -1; + return true; } static int ip_rcv_finish(struct sk_buff *skb) -- cgit v1.2.3 From 058bd4d2a4ff0aaa4a5381c67e776729d840c785 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sun, 11 Mar 2012 18:36:11 +0000 Subject: net: Convert printks to pr_ Use a more current kernel messaging style. Convert a printk block to print_hex_dump. Coalesce formats, align arguments. Use %s, __func__ instead of embedding function names. Some messages that were prefixed with _close are now prefixed with _fini. Some ah4 and esp messages are now not prefixed with "ip ". The intent of this patch is to later add something like #define pr_fmt(fmt) "IPv4: " fmt. to standardize the output messages. Text size is trivially reduced. (x86-32 allyesconfig) $ size net/ipv4/built-in.o* text data bss dec hex filename 887888 31558 249696 1169142 11d6f6 net/ipv4/built-in.o.new 887934 31558 249800 1169292 11d78c net/ipv4/built-in.o.old Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 25 +++++------ net/ipv4/ah4.c | 14 +++--- net/ipv4/esp4.c | 8 ++-- net/ipv4/fib_frontend.c | 6 +-- net/ipv4/fib_semantics.c | 2 +- net/ipv4/fib_trie.c | 5 +-- net/ipv4/icmp.c | 19 ++++---- net/ipv4/ip_fragment.c | 3 +- net/ipv4/ip_gre.c | 6 +-- net/ipv4/ip_input.c | 4 +- net/ipv4/ip_options.c | 2 +- net/ipv4/ipcomp.c | 8 ++-- net/ipv4/ipconfig.c | 110 +++++++++++++++++++++++------------------------ net/ipv4/ipip.c | 4 +- net/ipv4/ipmr.c | 4 +- net/ipv4/ping.c | 18 ++++---- net/ipv4/raw.c | 7 +-- net/ipv4/route.c | 43 ++++++++---------- net/ipv4/tcp.c | 7 ++- net/ipv4/tcp_cong.c | 7 ++- net/ipv4/tcp_input.c | 8 ++-- net/ipv4/tcp_ipv4.c | 8 ++-- net/ipv4/tunnel4.c | 8 ++-- net/ipv4/udplite.c | 4 +- net/ipv4/xfrm4_tunnel.c | 14 +++--- 25 files changed, 163 insertions(+), 181 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index e588a34e85c2..b7a946611f2f 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1085,13 +1085,11 @@ out: return; out_permanent: - printk(KERN_ERR "Attempt to override permanent protocol %d.\n", - protocol); + pr_err("Attempt to override permanent protocol %d\n", protocol); goto out; out_illegal: - printk(KERN_ERR - "Ignoring attempt to register invalid socket type %d.\n", + pr_err("Ignoring attempt to register invalid socket type %d\n", p->type); goto out; } @@ -1100,8 +1098,7 @@ EXPORT_SYMBOL(inet_register_protosw); void inet_unregister_protosw(struct inet_protosw *p) { if (INET_PROTOSW_PERMANENT & p->flags) { - printk(KERN_ERR - "Attempt to unregister permanent protocol %d.\n", + pr_err("Attempt to unregister permanent protocol %d\n", p->protocol); } else { spin_lock_bh(&inetsw_lock); @@ -1150,8 +1147,8 @@ static int inet_sk_reselect_saddr(struct sock *sk) return 0; if (sysctl_ip_dynaddr > 1) { - printk(KERN_INFO "%s(): shifting inet->saddr from %pI4 to %pI4\n", - __func__, &old_saddr, &new_saddr); + pr_info("%s(): shifting inet->saddr from %pI4 to %pI4\n", + __func__, &old_saddr, &new_saddr); } inet->inet_saddr = inet->inet_rcv_saddr = new_saddr; @@ -1680,14 +1677,14 @@ static int __init inet_init(void) */ if (inet_add_protocol(&icmp_protocol, IPPROTO_ICMP) < 0) - printk(KERN_CRIT "inet_init: Cannot add ICMP protocol\n"); + pr_crit("%s: Cannot add ICMP protocol\n", __func__); if (inet_add_protocol(&udp_protocol, IPPROTO_UDP) < 0) - printk(KERN_CRIT "inet_init: Cannot add UDP protocol\n"); + pr_crit("%s: Cannot add UDP protocol\n", __func__); if (inet_add_protocol(&tcp_protocol, IPPROTO_TCP) < 0) - printk(KERN_CRIT "inet_init: Cannot add TCP protocol\n"); + pr_crit("%s: Cannot add TCP protocol\n", __func__); #ifdef CONFIG_IP_MULTICAST if (inet_add_protocol(&igmp_protocol, IPPROTO_IGMP) < 0) - printk(KERN_CRIT "inet_init: Cannot add IGMP protocol\n"); + pr_crit("%s: Cannot add IGMP protocol\n", __func__); #endif /* Register the socket-side information for inet_create. */ @@ -1734,14 +1731,14 @@ static int __init inet_init(void) */ #if defined(CONFIG_IP_MROUTE) if (ip_mr_init()) - printk(KERN_CRIT "inet_init: Cannot init ipv4 mroute\n"); + pr_crit("%s: Cannot init ipv4 mroute\n", __func__); #endif /* * Initialise per-cpu ipv4 mibs */ if (init_ipv4_mibs()) - printk(KERN_CRIT "inet_init: Cannot init ipv4 mibs\n"); + pr_crit("%s: Cannot init ipv4 mibs\n", __func__); ipv4_proc_init(); diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 36d14406261e..b5524660c81d 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -445,9 +445,9 @@ static int ah_init_state(struct xfrm_state *x) if (aalg_desc->uinfo.auth.icv_fullbits/8 != crypto_ahash_digestsize(ahash)) { - printk(KERN_INFO "AH: %s digestsize %u != %hu\n", - x->aalg->alg_name, crypto_ahash_digestsize(ahash), - aalg_desc->uinfo.auth.icv_fullbits/8); + pr_info("AH: %s digestsize %u != %hu\n", + x->aalg->alg_name, crypto_ahash_digestsize(ahash), + aalg_desc->uinfo.auth.icv_fullbits/8); goto error; } @@ -510,11 +510,11 @@ static const struct net_protocol ah4_protocol = { static int __init ah4_init(void) { if (xfrm_register_type(&ah_type, AF_INET) < 0) { - printk(KERN_INFO "ip ah init: can't add xfrm type\n"); + pr_info("%s: can't add xfrm type\n", __func__); return -EAGAIN; } if (inet_add_protocol(&ah4_protocol, IPPROTO_AH) < 0) { - printk(KERN_INFO "ip ah init: can't add protocol\n"); + pr_info("%s: can't add protocol\n", __func__); xfrm_unregister_type(&ah_type, AF_INET); return -EAGAIN; } @@ -524,9 +524,9 @@ static int __init ah4_init(void) static void __exit ah4_fini(void) { if (inet_del_protocol(&ah4_protocol, IPPROTO_AH) < 0) - printk(KERN_INFO "ip ah close: can't remove protocol\n"); + pr_info("%s: can't remove protocol\n", __func__); if (xfrm_unregister_type(&ah_type, AF_INET) < 0) - printk(KERN_INFO "ip ah close: can't remove xfrm type\n"); + pr_info("%s: can't remove xfrm type\n", __func__); } module_init(ah4_init); diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index a5b413416da3..0a86dbe9454b 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -706,11 +706,11 @@ static const struct net_protocol esp4_protocol = { static int __init esp4_init(void) { if (xfrm_register_type(&esp_type, AF_INET) < 0) { - printk(KERN_INFO "ip esp init: can't add xfrm type\n"); + pr_info("%s: can't add xfrm type\n", __func__); return -EAGAIN; } if (inet_add_protocol(&esp4_protocol, IPPROTO_ESP) < 0) { - printk(KERN_INFO "ip esp init: can't add protocol\n"); + pr_info("%s: can't add protocol\n", __func__); xfrm_unregister_type(&esp_type, AF_INET); return -EAGAIN; } @@ -720,9 +720,9 @@ static int __init esp4_init(void) static void __exit esp4_fini(void) { if (inet_del_protocol(&esp4_protocol, IPPROTO_ESP) < 0) - printk(KERN_INFO "ip esp close: can't remove protocol\n"); + pr_info("%s: can't remove protocol\n", __func__); if (xfrm_unregister_type(&esp_type, AF_INET) < 0) - printk(KERN_INFO "ip esp close: can't remove xfrm type\n"); + pr_info("%s: can't remove xfrm type\n", __func__); } module_init(esp4_init); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 92fc5f69f5da..76e72bacc217 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -695,7 +695,7 @@ void fib_add_ifaddr(struct in_ifaddr *ifa) if (ifa->ifa_flags & IFA_F_SECONDARY) { prim = inet_ifa_byprefix(in_dev, prefix, mask); if (prim == NULL) { - printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n"); + pr_warn("%s: bug: prim == NULL\n", __func__); return; } } @@ -749,11 +749,11 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim) if (ifa->ifa_flags & IFA_F_SECONDARY) { prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask); if (prim == NULL) { - printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n"); + pr_warn("%s: bug: prim == NULL\n", __func__); return; } if (iprim && iprim != prim) { - printk(KERN_WARNING "fib_del_ifaddr: bug: iprim != prim\n"); + pr_warn("%s: bug: iprim != prim\n", __func__); return; } } else if (!ipv4_is_zeronet(any) && diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 80106d89d548..a8c5c1d6715b 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -154,7 +154,7 @@ static void free_fib_info_rcu(struct rcu_head *head) void free_fib_info(struct fib_info *fi) { if (fi->fib_dead == 0) { - pr_warning("Freeing alive fib_info %p\n", fi); + pr_warn("Freeing alive fib_info %p\n", fi); return; } change_nexthops(fi) { diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 2b555a5521e0..da9b9cb2282d 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1170,9 +1170,8 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) } if (tp && tp->pos + tp->bits > 32) - pr_warning("fib_trie" - " tp=%p pos=%d, bits=%d, key=%0x plen=%d\n", - tp, tp->pos, tp->bits, key, plen); + pr_warn("fib_trie tp=%p pos=%d, bits=%d, key=%0x plen=%d\n", + tp, tp->pos, tp->bits, key, plen); /* Rebalance the trie */ diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index ab188ae12fd9..5c7323b7810f 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -670,7 +670,7 @@ static void icmp_unreach(struct sk_buff *skb) break; case ICMP_FRAG_NEEDED: if (ipv4_config.no_pmtu_disc) { - LIMIT_NETDEBUG(KERN_INFO "ICMP: %pI4: fragmentation needed and DF set.\n", + LIMIT_NETDEBUG(KERN_INFO "ICMP: %pI4: fragmentation needed and DF set\n", &iph->daddr); } else { info = ip_rt_frag_needed(net, iph, @@ -681,7 +681,7 @@ static void icmp_unreach(struct sk_buff *skb) } break; case ICMP_SR_FAILED: - LIMIT_NETDEBUG(KERN_INFO "ICMP: %pI4: Source Route Failed.\n", + LIMIT_NETDEBUG(KERN_INFO "ICMP: %pI4: Source Route Failed\n", &iph->daddr); break; default: @@ -713,13 +713,10 @@ static void icmp_unreach(struct sk_buff *skb) if (!net->ipv4.sysctl_icmp_ignore_bogus_error_responses && inet_addr_type(net, iph->daddr) == RTN_BROADCAST) { if (net_ratelimit()) - printk(KERN_WARNING "%pI4 sent an invalid ICMP " - "type %u, code %u " - "error to a broadcast: %pI4 on %s\n", - &ip_hdr(skb)->saddr, - icmph->type, icmph->code, - &iph->daddr, - skb->dev->name); + pr_warn("%pI4 sent an invalid ICMP type %u, code %u error to a broadcast: %pI4 on %s\n", + &ip_hdr(skb)->saddr, + icmph->type, icmph->code, + &iph->daddr, skb->dev->name); goto out; } @@ -946,8 +943,8 @@ static void icmp_address_reply(struct sk_buff *skb) break; } if (!ifa && net_ratelimit()) { - printk(KERN_INFO "Wrong address mask %pI4 from %s/%pI4\n", - mp, dev->name, &ip_hdr(skb)->saddr); + pr_info("Wrong address mask %pI4 from %s/%pI4\n", + mp, dev->name, &ip_hdr(skb)->saddr); } } } diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 1f23a57aa9e6..b2fd333abd59 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -643,8 +643,7 @@ out_nomem: goto out_fail; out_oversize: if (net_ratelimit()) - printk(KERN_INFO "Oversized IP packet from %pI4.\n", - &qp->saddr); + pr_info("Oversized IP packet from %pI4\n", &qp->saddr); out_fail: IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); return err; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 6ef66af12291..185e0a75ecec 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1716,7 +1716,7 @@ static int __init ipgre_init(void) { int err; - printk(KERN_INFO "GRE over IPv4 tunneling driver\n"); + pr_info("GRE over IPv4 tunneling driver\n"); err = register_pernet_device(&ipgre_net_ops); if (err < 0) @@ -1724,7 +1724,7 @@ static int __init ipgre_init(void) err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO); if (err < 0) { - printk(KERN_INFO "ipgre init: can't add protocol\n"); + pr_info("%s: can't add protocol\n", __func__); goto add_proto_failed; } @@ -1753,7 +1753,7 @@ static void __exit ipgre_fini(void) rtnl_link_unregister(&ipgre_tap_ops); rtnl_link_unregister(&ipgre_link_ops); if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0) - printk(KERN_INFO "ipgre close: can't remove protocol\n"); + pr_info("%s: can't remove protocol\n", __func__); unregister_pernet_device(&ipgre_net_ops); } diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index bdaa2c5c28e4..de8e9be4aac8 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -299,8 +299,8 @@ static inline bool ip_rcv_options(struct sk_buff *skb) if (!IN_DEV_SOURCE_ROUTE(in_dev)) { if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) - printk(KERN_INFO "source route option %pI4 -> %pI4\n", - &iph->saddr, &iph->daddr); + pr_info("source route option %pI4 -> %pI4\n", + &iph->saddr, &iph->daddr); goto drop; } } diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 42dd1a90edea..e25ee7c8bdde 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -577,7 +577,7 @@ void ip_forward_options(struct sk_buff *skb) ip_rt_get_source(&optptr[srrptr-1], skb, rt); optptr[2] = srrptr+4; } else if (net_ratelimit()) - printk(KERN_CRIT "ip_forward(): Argh! Destination lost!\n"); + pr_crit("%s(): Argh! Destination lost!\n", __func__); if (opt->ts_needaddr) { optptr = raw + opt->ts; ip_rt_get_source(&optptr[optptr[2]-9], skb, rt); diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index c857f6f49b03..63b64c45a826 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -156,11 +156,11 @@ static const struct net_protocol ipcomp4_protocol = { static int __init ipcomp4_init(void) { if (xfrm_register_type(&ipcomp_type, AF_INET) < 0) { - printk(KERN_INFO "ipcomp init: can't add xfrm type\n"); + pr_info("%s: can't add xfrm type\n", __func__); return -EAGAIN; } if (inet_add_protocol(&ipcomp4_protocol, IPPROTO_COMP) < 0) { - printk(KERN_INFO "ipcomp init: can't add protocol\n"); + pr_info("%s: can't add protocol\n", __func__); xfrm_unregister_type(&ipcomp_type, AF_INET); return -EAGAIN; } @@ -170,9 +170,9 @@ static int __init ipcomp4_init(void) static void __exit ipcomp4_fini(void) { if (inet_del_protocol(&ipcomp4_protocol, IPPROTO_COMP) < 0) - printk(KERN_INFO "ip ipcomp close: can't remove protocol\n"); + pr_info("%s: can't remove protocol\n", __func__); if (xfrm_unregister_type(&ipcomp_type, AF_INET) < 0) - printk(KERN_INFO "ip ipcomp close: can't remove xfrm type\n"); + pr_info("%s: can't remove xfrm type\n", __func__); } module_init(ipcomp4_init); diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 6e412a60a91f..92ac7e7363a0 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -214,7 +214,7 @@ static int __init ic_open_devs(void) if (!(dev->flags & IFF_LOOPBACK)) continue; if (dev_change_flags(dev, dev->flags | IFF_UP) < 0) - printk(KERN_ERR "IP-Config: Failed to open %s\n", dev->name); + pr_err("IP-Config: Failed to open %s\n", dev->name); } for_each_netdev(&init_net, dev) { @@ -223,7 +223,8 @@ static int __init ic_open_devs(void) if (dev->mtu >= 364) able |= IC_BOOTP; else - printk(KERN_WARNING "DHCP/BOOTP: Ignoring device %s, MTU %d too small", dev->name, dev->mtu); + pr_warn("DHCP/BOOTP: Ignoring device %s, MTU %d too small", + dev->name, dev->mtu); if (!(dev->flags & IFF_NOARP)) able |= IC_RARP; able &= ic_proto_enabled; @@ -231,7 +232,8 @@ static int __init ic_open_devs(void) continue; oflags = dev->flags; if (dev_change_flags(dev, oflags | IFF_UP) < 0) { - printk(KERN_ERR "IP-Config: Failed to open %s\n", dev->name); + pr_err("IP-Config: Failed to open %s\n", + dev->name); continue; } if (!(d = kmalloc(sizeof(struct ic_device), GFP_KERNEL))) { @@ -273,9 +275,10 @@ have_carrier: if (!ic_first_dev) { if (user_dev_name[0]) - printk(KERN_ERR "IP-Config: Device `%s' not found.\n", user_dev_name); + pr_err("IP-Config: Device `%s' not found\n", + user_dev_name); else - printk(KERN_ERR "IP-Config: No network devices available.\n"); + pr_err("IP-Config: No network devices available\n"); return -ENODEV; } return 0; @@ -359,17 +362,20 @@ static int __init ic_setup_if(void) strcpy(ir.ifr_ifrn.ifrn_name, ic_dev->name); set_sockaddr(sin, ic_myaddr, 0); if ((err = ic_devinet_ioctl(SIOCSIFADDR, &ir)) < 0) { - printk(KERN_ERR "IP-Config: Unable to set interface address (%d).\n", err); + pr_err("IP-Config: Unable to set interface address (%d)\n", + err); return -1; } set_sockaddr(sin, ic_netmask, 0); if ((err = ic_devinet_ioctl(SIOCSIFNETMASK, &ir)) < 0) { - printk(KERN_ERR "IP-Config: Unable to set interface netmask (%d).\n", err); + pr_err("IP-Config: Unable to set interface netmask (%d)\n", + err); return -1; } set_sockaddr(sin, ic_myaddr | ~ic_netmask, 0); if ((err = ic_devinet_ioctl(SIOCSIFBRDADDR, &ir)) < 0) { - printk(KERN_ERR "IP-Config: Unable to set interface broadcast address (%d).\n", err); + pr_err("IP-Config: Unable to set interface broadcast address (%d)\n", + err); return -1; } /* Handle the case where we need non-standard MTU on the boot link (a network @@ -380,8 +386,8 @@ static int __init ic_setup_if(void) strcpy(ir.ifr_name, ic_dev->name); ir.ifr_mtu = ic_dev_mtu; if ((err = ic_dev_ioctl(SIOCSIFMTU, &ir)) < 0) - printk(KERN_ERR "IP-Config: Unable to set interface mtu to %d (%d).\n", - ic_dev_mtu, err); + pr_err("IP-Config: Unable to set interface mtu to %d (%d)\n", + ic_dev_mtu, err); } return 0; } @@ -396,7 +402,7 @@ static int __init ic_setup_routes(void) memset(&rm, 0, sizeof(rm)); if ((ic_gateway ^ ic_myaddr) & ic_netmask) { - printk(KERN_ERR "IP-Config: Gateway not on directly connected network.\n"); + pr_err("IP-Config: Gateway not on directly connected network\n"); return -1; } set_sockaddr((struct sockaddr_in *) &rm.rt_dst, 0, 0); @@ -404,7 +410,8 @@ static int __init ic_setup_routes(void) set_sockaddr((struct sockaddr_in *) &rm.rt_gateway, ic_gateway, 0); rm.rt_flags = RTF_UP | RTF_GATEWAY; if ((err = ic_route_ioctl(SIOCADDRT, &rm)) < 0) { - printk(KERN_ERR "IP-Config: Cannot add default route (%d).\n", err); + pr_err("IP-Config: Cannot add default route (%d)\n", + err); return -1; } } @@ -437,8 +444,8 @@ static int __init ic_defaults(void) else if (IN_CLASSC(ntohl(ic_myaddr))) ic_netmask = htonl(IN_CLASSC_NET); else { - printk(KERN_ERR "IP-Config: Unable to guess netmask for address %pI4\n", - &ic_myaddr); + pr_err("IP-Config: Unable to guess netmask for address %pI4\n", + &ic_myaddr); return -1; } printk("IP-Config: Guessing netmask %pI4\n", &ic_netmask); @@ -688,8 +695,8 @@ ic_dhcp_init_options(u8 *options) e += len; } if (*vendor_class_identifier) { - printk(KERN_INFO "DHCP: sending class identifier \"%s\"\n", - vendor_class_identifier); + pr_info("DHCP: sending class identifier \"%s\"\n", + vendor_class_identifier); *e++ = 60; /* Class-identifier */ len = strlen(vendor_class_identifier); *e++ = len; @@ -949,8 +956,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str /* Fragments are not supported */ if (ip_is_fragment(h)) { if (net_ratelimit()) - printk(KERN_ERR "DHCP/BOOTP: Ignoring fragmented " - "reply.\n"); + pr_err("DHCP/BOOTP: Ignoring fragmented reply\n"); goto drop; } @@ -999,8 +1005,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str if (b->op != BOOTP_REPLY || b->xid != d->xid) { if (net_ratelimit()) - printk(KERN_ERR "DHCP/BOOTP: Reply not for us, " - "op[%x] xid[%x]\n", + pr_err("DHCP/BOOTP: Reply not for us, op[%x] xid[%x]\n", b->op, b->xid); goto drop_unlock; } @@ -1008,7 +1013,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str /* Is it a reply for the device we are configuring? */ if (b->xid != ic_dev_xid) { if (net_ratelimit()) - printk(KERN_ERR "DHCP/BOOTP: Ignoring delayed packet\n"); + pr_err("DHCP/BOOTP: Ignoring delayed packet\n"); goto drop_unlock; } @@ -1146,17 +1151,17 @@ static int __init ic_dynamic(void) * are missing, and without DHCP/BOOTP/RARP we are unable to get it. */ if (!ic_proto_enabled) { - printk(KERN_ERR "IP-Config: Incomplete network configuration information.\n"); + pr_err("IP-Config: Incomplete network configuration information\n"); return -1; } #ifdef IPCONFIG_BOOTP if ((ic_proto_enabled ^ ic_proto_have_if) & IC_BOOTP) - printk(KERN_ERR "DHCP/BOOTP: No suitable device found.\n"); + pr_err("DHCP/BOOTP: No suitable device found\n"); #endif #ifdef IPCONFIG_RARP if ((ic_proto_enabled ^ ic_proto_have_if) & IC_RARP) - printk(KERN_ERR "RARP: No suitable device found.\n"); + pr_err("RARP: No suitable device found\n"); #endif if (!ic_proto_have_if) @@ -1183,11 +1188,11 @@ static int __init ic_dynamic(void) * [Actually we could now, but the nothing else running note still * applies.. - AC] */ - printk(KERN_NOTICE "Sending %s%s%s requests .", - do_bootp - ? ((ic_proto_enabled & IC_USE_DHCP) ? "DHCP" : "BOOTP") : "", - (do_bootp && do_rarp) ? " and " : "", - do_rarp ? "RARP" : ""); + pr_notice("Sending %s%s%s requests .", + do_bootp + ? ((ic_proto_enabled & IC_USE_DHCP) ? "DHCP" : "BOOTP") : "", + (do_bootp && do_rarp) ? " and " : "", + do_rarp ? "RARP" : ""); start_jiffies = jiffies; d = ic_first_dev; @@ -1216,13 +1221,13 @@ static int __init ic_dynamic(void) (ic_proto_enabled & IC_USE_DHCP) && ic_dhcp_msgtype != DHCPACK) { ic_got_reply = 0; - printk(KERN_CONT ","); + pr_cont(","); continue; } #endif /* IPCONFIG_DHCP */ if (ic_got_reply) { - printk(KERN_CONT " OK\n"); + pr_cont(" OK\n"); break; } @@ -1230,7 +1235,7 @@ static int __init ic_dynamic(void) continue; if (! --retries) { - printk(KERN_CONT " timed out!\n"); + pr_cont(" timed out!\n"); break; } @@ -1240,7 +1245,7 @@ static int __init ic_dynamic(void) if (timeout > CONF_TIMEOUT_MAX) timeout = CONF_TIMEOUT_MAX; - printk(KERN_CONT "."); + pr_cont("."); } #ifdef IPCONFIG_BOOTP @@ -1260,8 +1265,8 @@ static int __init ic_dynamic(void) printk("IP-Config: Got %s answer from %pI4, ", ((ic_got_reply & IC_RARP) ? "RARP" : (ic_proto_enabled & IC_USE_DHCP) ? "DHCP" : "BOOTP"), - &ic_servaddr); - printk(KERN_CONT "my address is %pI4\n", &ic_myaddr); + &ic_servaddr); + pr_cont("my address is %pI4\n", &ic_myaddr); return 0; } @@ -1437,24 +1442,22 @@ static int __init ip_auto_config(void) */ #ifdef CONFIG_ROOT_NFS if (ROOT_DEV == Root_NFS) { - printk(KERN_ERR - "IP-Config: Retrying forever (NFS root)...\n"); + pr_err("IP-Config: Retrying forever (NFS root)...\n"); goto try_try_again; } #endif if (--retries) { - printk(KERN_ERR - "IP-Config: Reopening network devices...\n"); + pr_err("IP-Config: Reopening network devices...\n"); goto try_try_again; } /* Oh, well. At least we tried. */ - printk(KERN_ERR "IP-Config: Auto-configuration of network failed.\n"); + pr_err("IP-Config: Auto-configuration of network failed\n"); return -1; } #else /* !DYNAMIC */ - printk(KERN_ERR "IP-Config: Incomplete network configuration information.\n"); + pr_err("IP-Config: Incomplete network configuration information\n"); ic_close_devs(); return -1; #endif /* IPCONFIG_DYNAMIC */ @@ -1492,19 +1495,16 @@ static int __init ip_auto_config(void) /* * Clue in the operator. */ - printk("IP-Config: Complete:\n"); - printk(" device=%s", ic_dev->name); - printk(KERN_CONT ", addr=%pI4", &ic_myaddr); - printk(KERN_CONT ", mask=%pI4", &ic_netmask); - printk(KERN_CONT ", gw=%pI4", &ic_gateway); - printk(KERN_CONT ",\n host=%s, domain=%s, nis-domain=%s", - utsname()->nodename, ic_domain, utsname()->domainname); - printk(KERN_CONT ",\n bootserver=%pI4", &ic_servaddr); - printk(KERN_CONT ", rootserver=%pI4", &root_server_addr); - printk(KERN_CONT ", rootpath=%s", root_server_path); + pr_info("IP-Config: Complete:\n"); + pr_info(" device=%s, addr=%pI4, mask=%pI4, gw=%pI4\n", + ic_dev->name, &ic_myaddr, &ic_netmask, &ic_gateway); + pr_info(" host=%s, domain=%s, nis-domain=%s\n", + utsname()->nodename, ic_domain, utsname()->domainname); + pr_info(" bootserver=%pI4, rootserver=%pI4, rootpath=%s", + &ic_servaddr, &root_server_addr, root_server_path); if (ic_dev_mtu) - printk(KERN_CONT ", mtu=%d", ic_dev_mtu); - printk(KERN_CONT "\n"); + pr_cont(", mtu=%d", ic_dev_mtu); + pr_cont("\n"); #endif /* !SILENT */ return 0; @@ -1637,8 +1637,8 @@ static int __init vendor_class_identifier_setup(char *addrs) if (strlcpy(vendor_class_identifier, addrs, sizeof(vendor_class_identifier)) >= sizeof(vendor_class_identifier)) - printk(KERN_WARNING "DHCP: vendorclass too long, truncated to \"%s\"", - vendor_class_identifier); + pr_warn("DHCP: vendorclass too long, truncated to \"%s\"", + vendor_class_identifier); return 1; } diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index f84ebff5cdb0..ae1413e3f2f8 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -892,7 +892,7 @@ static int __init ipip_init(void) err = xfrm4_tunnel_register(&ipip_handler, AF_INET); if (err < 0) { unregister_pernet_device(&ipip_net_ops); - printk(KERN_INFO "ipip init: can't register tunnel\n"); + pr_info("%s: can't register tunnel\n", __func__); } return err; } @@ -900,7 +900,7 @@ static int __init ipip_init(void) static void __exit ipip_fini(void) { if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET)) - printk(KERN_INFO "ipip close: can't deregister tunnel\n"); + pr_info("%s: can't deregister tunnel\n", __func__); unregister_pernet_device(&ipip_net_ops); } diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 7bc2db6db8d4..0518a4fb177b 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -951,7 +951,7 @@ static int ipmr_cache_report(struct mr_table *mrt, rcu_read_unlock(); if (ret < 0) { if (net_ratelimit()) - printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n"); + pr_warn("mroute: pending queue full, dropping entries\n"); kfree_skb(skb); } @@ -2538,7 +2538,7 @@ int __init ip_mr_init(void) goto reg_notif_fail; #ifdef CONFIG_IP_PIMSM_V2 if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) { - printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n"); + pr_err("%s: can't add PIM protocol\n", __func__); err = -EAGAIN; goto add_proto_fail; } diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 4398a45a9600..ab6b36e6da15 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -156,7 +156,7 @@ static struct sock *ping_v4_lookup(struct net *net, __be32 saddr, __be32 daddr, struct hlist_nulls_node *hnode; pr_debug("try to find: num = %d, daddr = %pI4, dif = %d\n", - (int)ident, &daddr, dif); + (int)ident, &daddr, dif); read_lock_bh(&ping_table.lock); ping_portaddr_for_each_entry(sk, hnode, hslot) { @@ -229,7 +229,7 @@ static int ping_init_sock(struct sock *sk) static void ping_close(struct sock *sk, long timeout) { pr_debug("ping_close(sk=%p,sk->num=%u)\n", - inet_sk(sk), inet_sk(sk)->inet_num); + inet_sk(sk), inet_sk(sk)->inet_num); pr_debug("isk->refcnt = %d\n", sk->sk_refcnt.counter); sk_common_release(sk); @@ -252,7 +252,7 @@ static int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) return -EINVAL; pr_debug("ping_v4_bind(sk=%p,sa_addr=%08x,sa_port=%d)\n", - sk, addr->sin_addr.s_addr, ntohs(addr->sin_port)); + sk, addr->sin_addr.s_addr, ntohs(addr->sin_port)); chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr); if (addr->sin_addr.s_addr == htonl(INADDR_ANY)) @@ -280,9 +280,9 @@ static int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) } pr_debug("after bind(): num = %d, daddr = %pI4, dif = %d\n", - (int)isk->inet_num, - &isk->inet_rcv_saddr, - (int)sk->sk_bound_dev_if); + (int)isk->inet_num, + &isk->inet_rcv_saddr, + (int)sk->sk_bound_dev_if); err = 0; if (isk->inet_rcv_saddr) @@ -335,7 +335,7 @@ void ping_err(struct sk_buff *skb, u32 info) return; pr_debug("ping_err(type=%04x,code=%04x,id=%04x,seq=%04x)\n", type, - code, ntohs(icmph->un.echo.id), ntohs(icmph->un.echo.sequence)); + code, ntohs(icmph->un.echo.id), ntohs(icmph->un.echo.sequence)); sk = ping_v4_lookup(net, iph->daddr, iph->saddr, ntohs(icmph->un.echo.id), skb->dev->ifindex); @@ -679,7 +679,7 @@ out: static int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { pr_debug("ping_queue_rcv_skb(sk=%p,sk->num=%d,skb=%p)\n", - inet_sk(sk), inet_sk(sk)->inet_num, skb); + inet_sk(sk), inet_sk(sk)->inet_num, skb); if (sock_queue_rcv_skb(sk, skb) < 0) { kfree_skb(skb); pr_debug("ping_queue_rcv_skb -> failed\n"); @@ -705,7 +705,7 @@ void ping_rcv(struct sk_buff *skb) /* We assume the packet has already been checked by icmp_rcv */ pr_debug("ping_rcv(skb=%p,id=%04x,seq=%04x)\n", - skb, ntohs(icmph->un.echo.id), ntohs(icmph->un.echo.sequence)); + skb, ntohs(icmph->un.echo.id), ntohs(icmph->un.echo.sequence)); /* Push ICMP header back */ skb_push(skb, skb->data - (u8 *)icmph); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index ab466305b629..bbd604c68e68 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -491,11 +491,8 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (msg->msg_namelen < sizeof(*usin)) goto out; if (usin->sin_family != AF_INET) { - static int complained; - if (!complained++) - printk(KERN_INFO "%s forgot to set AF_INET in " - "raw sendmsg. Fix it!\n", - current->comm); + pr_info_once("%s: %s forgot to set AF_INET. Fix it!\n", + __func__, current->comm); err = -EAFNOSUPPORT; if (usin->sin_family) goto out; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 815989b90dea..cbda6de0a77c 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -959,7 +959,7 @@ void rt_cache_flush_batch(struct net *net) static void rt_emergency_hash_rebuild(struct net *net) { if (net_ratelimit()) - printk(KERN_WARNING "Route hash chain too long!\n"); + pr_warn("Route hash chain too long!\n"); rt_cache_invalidate(net); } @@ -1083,7 +1083,7 @@ static int rt_garbage_collect(struct dst_ops *ops) if (dst_entries_get_slow(&ipv4_dst_ops) < ip_rt_max_size) goto out; if (net_ratelimit()) - printk(KERN_WARNING "dst cache overflow\n"); + pr_warn("dst cache overflow\n"); RT_CACHE_STAT_INC(gc_dst_overflow); return 1; @@ -1181,8 +1181,7 @@ restart: int err = rt_bind_neighbour(rt); if (err) { if (net_ratelimit()) - printk(KERN_WARNING - "Neighbour table failure & not caching routes.\n"); + pr_warn("Neighbour table failure & not caching routes\n"); ip_rt_put(rt); return ERR_PTR(err); } @@ -1258,7 +1257,7 @@ restart: struct net *net = dev_net(rt->dst.dev); int num = ++net->ipv4.current_rt_cache_rebuild_count; if (!rt_caching(net)) { - printk(KERN_WARNING "%s: %d rebuilds is over limit, route caching disabled\n", + pr_warn("%s: %d rebuilds is over limit, route caching disabled\n", rt->dst.dev->name, num); } rt_emergency_hash_rebuild(net); @@ -1299,7 +1298,7 @@ restart: } if (net_ratelimit()) - printk(KERN_WARNING "ipv4: Neighbour table overflow.\n"); + pr_warn("ipv4: Neighbour table overflow\n"); rt_drop(rt); return ERR_PTR(-ENOBUFS); } @@ -1503,10 +1502,10 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, reject_redirect: #ifdef CONFIG_IP_ROUTE_VERBOSE if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) - printk(KERN_INFO "Redirect from %pI4 on %s about %pI4 ignored.\n" + pr_info("Redirect from %pI4 on %s about %pI4 ignored\n" " Advised path = %pI4 -> %pI4\n", - &old_gw, dev->name, &new_gw, - &saddr, &daddr); + &old_gw, dev->name, &new_gw, + &saddr, &daddr); #endif ; } @@ -1618,8 +1617,8 @@ void ip_rt_send_redirect(struct sk_buff *skb) if (log_martians && peer->rate_tokens == ip_rt_redirect_number && net_ratelimit()) - printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n", - &ip_hdr(skb)->saddr, rt->rt_iif, + pr_warn("host %pI4/if%d ignores redirects for %pI4 to %pI4\n", + &ip_hdr(skb)->saddr, rt->rt_iif, &rt->rt_dst, &rt->rt_gateway); #endif } @@ -2105,18 +2104,13 @@ static void ip_handle_martian_source(struct net_device *dev, * RFC1812 recommendation, if source is martian, * the only hint is MAC header. */ - printk(KERN_WARNING "martian source %pI4 from %pI4, on dev %s\n", + pr_warn("martian source %pI4 from %pI4, on dev %s\n", &daddr, &saddr, dev->name); if (dev->hard_header_len && skb_mac_header_was_set(skb)) { - int i; - const unsigned char *p = skb_mac_header(skb); - printk(KERN_WARNING "ll header: "); - for (i = 0; i < dev->hard_header_len; i++, p++) { - printk("%02x", *p); - if (i < (dev->hard_header_len - 1)) - printk(":"); - } - printk("\n"); + print_hex_dump(KERN_WARNING, "ll header: ", + DUMP_PREFIX_OFFSET, 16, 1, + skb_mac_header(skb), + dev->hard_header_len, true); } } #endif @@ -2140,8 +2134,7 @@ static int __mkroute_input(struct sk_buff *skb, out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res)); if (out_dev == NULL) { if (net_ratelimit()) - printk(KERN_CRIT "Bug in ip_route_input" \ - "_slow(). Please, report\n"); + pr_crit("Bug in ip_route_input_slow(). Please report.\n"); return -EINVAL; } @@ -2413,7 +2406,7 @@ martian_destination: RT_CACHE_STAT_INC(in_martian_dst); #ifdef CONFIG_IP_ROUTE_VERBOSE if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) - printk(KERN_WARNING "martian destination %pI4 from %pI4, dev %s\n", + pr_warn("martian destination %pI4 from %pI4, dev %s\n", &daddr, &saddr, dev->name); #endif @@ -3490,7 +3483,7 @@ int __init ip_rt_init(void) net_random() % ip_rt_gc_interval + ip_rt_gc_interval); if (ip_rt_proc_init()) - printk(KERN_ERR "Unable to create route proc files\n"); + pr_err("Unable to create route proc files\n"); #ifdef CONFIG_XFRM xfrm_init(); xfrm4_init(ip_rt_max_size); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 22ef5f9fd2ff..2baba1bed810 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1675,7 +1675,7 @@ do_prequeue: if (tp->ucopy.dma_cookie < 0) { - printk(KERN_ALERT "dma_cookie < 0\n"); + pr_alert("dma_cookie < 0\n"); /* Exception. Bailout! */ if (!copied) @@ -3311,9 +3311,8 @@ void __init tcp_init(void) sysctl_tcp_rmem[1] = 87380; sysctl_tcp_rmem[2] = max(87380, max_share); - printk(KERN_INFO "TCP: Hash tables configured " - "(established %u bind %u)\n", - tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size); + pr_info("TCP: Hash tables configured (established %u bind %u)\n", + tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size); tcp_register_congestion_control(&tcp_reno); diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index fc6d475f488f..67bab75f441f 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -41,18 +41,17 @@ int tcp_register_congestion_control(struct tcp_congestion_ops *ca) /* all algorithms must implement ssthresh and cong_avoid ops */ if (!ca->ssthresh || !ca->cong_avoid) { - printk(KERN_ERR "TCP %s does not implement required ops\n", - ca->name); + pr_err("TCP %s does not implement required ops\n", ca->name); return -EINVAL; } spin_lock(&tcp_cong_list_lock); if (tcp_ca_find(ca->name)) { - printk(KERN_NOTICE "TCP %s already registered\n", ca->name); + pr_notice("TCP %s already registered\n", ca->name); ret = -EEXIST; } else { list_add_tail_rcu(&ca->list, &tcp_cong_list); - printk(KERN_INFO "TCP %s registered\n", ca->name); + pr_info("TCP %s registered\n", ca->name); } spin_unlock(&tcp_cong_list_lock); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index b5e315f13641..23ca3663d1e8 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3867,9 +3867,9 @@ void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *o opt_rx->wscale_ok = 1; if (snd_wscale > 14) { if (net_ratelimit()) - printk(KERN_INFO "tcp_parse_options: Illegal window " - "scaling value %d >14 received.\n", - snd_wscale); + pr_info("%s: Illegal window scaling value %d >14 received\n", + __func__, + snd_wscale); snd_wscale = 14; } opt_rx->snd_wscale = snd_wscale; @@ -4191,7 +4191,7 @@ static void tcp_fin(struct sock *sk) /* Only TCP_LISTEN and TCP_CLOSE are left, in these * cases we should never reach this piece of code. */ - printk(KERN_ERR "%s: Impossible, sk->sk_state=%d\n", + pr_err("%s: Impossible, sk->sk_state=%d\n", __func__, sk->sk_state); break; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 507924b640ef..257dba66eaca 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1227,10 +1227,10 @@ static int tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) if (genhash || memcmp(hash_location, newhash, 16) != 0) { if (net_ratelimit()) { - printk(KERN_INFO "MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n", - &iph->saddr, ntohs(th->source), - &iph->daddr, ntohs(th->dest), - genhash ? " tcp_v4_calc_md5_hash failed" : ""); + pr_info("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n", + &iph->saddr, ntohs(th->source), + &iph->daddr, ntohs(th->dest), + genhash ? " tcp_v4_calc_md5_hash failed" : ""); } return 1; } diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c index 01775983b997..0d0171830620 100644 --- a/net/ipv4/tunnel4.c +++ b/net/ipv4/tunnel4.c @@ -164,12 +164,12 @@ static const struct net_protocol tunnel64_protocol = { static int __init tunnel4_init(void) { if (inet_add_protocol(&tunnel4_protocol, IPPROTO_IPIP)) { - printk(KERN_ERR "tunnel4 init: can't add protocol\n"); + pr_err("%s: can't add protocol\n", __func__); return -EAGAIN; } #if IS_ENABLED(CONFIG_IPV6) if (inet_add_protocol(&tunnel64_protocol, IPPROTO_IPV6)) { - printk(KERN_ERR "tunnel64 init: can't add protocol\n"); + pr_err("tunnel64 init: can't add protocol\n"); inet_del_protocol(&tunnel4_protocol, IPPROTO_IPIP); return -EAGAIN; } @@ -181,10 +181,10 @@ static void __exit tunnel4_fini(void) { #if IS_ENABLED(CONFIG_IPV6) if (inet_del_protocol(&tunnel64_protocol, IPPROTO_IPV6)) - printk(KERN_ERR "tunnel64 close: can't remove protocol\n"); + pr_err("tunnel64 close: can't remove protocol\n"); #endif if (inet_del_protocol(&tunnel4_protocol, IPPROTO_IPIP)) - printk(KERN_ERR "tunnel4 close: can't remove protocol\n"); + pr_err("tunnel4 close: can't remove protocol\n"); } module_init(tunnel4_init); diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index 12e9499a1a6c..280bfb12adcc 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -129,11 +129,11 @@ void __init udplite4_register(void) inet_register_protosw(&udplite4_protosw); if (udplite4_proc_init()) - printk(KERN_ERR "%s: Cannot register /proc!\n", __func__); + pr_err("%s: Cannot register /proc!\n", __func__); return; out_unregister_proto: proto_unregister(&udplite_prot); out_register_err: - printk(KERN_CRIT "%s: Cannot add UDP-Lite protocol.\n", __func__); + pr_crit("%s: Cannot add UDP-Lite protocol\n", __func__); } diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c index 9247d9d70e9d..5b48f49df0ba 100644 --- a/net/ipv4/xfrm4_tunnel.c +++ b/net/ipv4/xfrm4_tunnel.c @@ -75,18 +75,18 @@ static struct xfrm_tunnel xfrm64_tunnel_handler __read_mostly = { static int __init ipip_init(void) { if (xfrm_register_type(&ipip_type, AF_INET) < 0) { - printk(KERN_INFO "ipip init: can't add xfrm type\n"); + pr_info("%s: can't add xfrm type\n", __func__); return -EAGAIN; } if (xfrm4_tunnel_register(&xfrm_tunnel_handler, AF_INET)) { - printk(KERN_INFO "ipip init: can't add xfrm handler for AF_INET\n"); + pr_info("%s: can't add xfrm handler for AF_INET\n", __func__); xfrm_unregister_type(&ipip_type, AF_INET); return -EAGAIN; } #if IS_ENABLED(CONFIG_IPV6) if (xfrm4_tunnel_register(&xfrm64_tunnel_handler, AF_INET6)) { - printk(KERN_INFO "ipip init: can't add xfrm handler for AF_INET6\n"); + pr_info("%s: can't add xfrm handler for AF_INET6\n", __func__); xfrm4_tunnel_deregister(&xfrm_tunnel_handler, AF_INET); xfrm_unregister_type(&ipip_type, AF_INET); return -EAGAIN; @@ -99,12 +99,14 @@ static void __exit ipip_fini(void) { #if IS_ENABLED(CONFIG_IPV6) if (xfrm4_tunnel_deregister(&xfrm64_tunnel_handler, AF_INET6)) - printk(KERN_INFO "ipip close: can't remove xfrm handler for AF_INET6\n"); + pr_info("%s: can't remove xfrm handler for AF_INET6\n", + __func__); #endif if (xfrm4_tunnel_deregister(&xfrm_tunnel_handler, AF_INET)) - printk(KERN_INFO "ipip close: can't remove xfrm handler for AF_INET\n"); + pr_info("%s: can't remove xfrm handler for AF_INET\n", + __func__); if (xfrm_unregister_type(&ipip_type, AF_INET) < 0) - printk(KERN_INFO "ipip close: can't remove xfrm type\n"); + pr_info("%s: can't remove xfrm type\n", __func__); } module_init(ipip_init); -- cgit v1.2.3 From afd465030acb4098abcb6b965a5aebc7ea2209e0 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 12 Mar 2012 07:03:32 +0000 Subject: net: ipv4: Standardize prefixes for message logging Add #define pr_fmt(fmt) as appropriate. Add "IPv4: ", "TCP: ", and "IPsec: " to appropriate files. Standardize on "UDPLite: " for appropriate uses. Some prefixes were previously "UDPLITE: " and "UDP-Lite: ". Add KBUILD_MODNAME ": " to icmp and gre. Remove embedded prefixes as appropriate. Add missing "\n" to pr_info in gre.c. Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- include/net/udplite.h | 4 ++-- net/ipv4/af_inet.c | 2 ++ net/ipv4/ah4.c | 9 ++++++--- net/ipv4/esp4.c | 2 ++ net/ipv4/gre.c | 6 ++++-- net/ipv4/icmp.c | 6 ++++-- net/ipv4/ip_fragment.c | 8 +++++--- net/ipv4/ip_gre.c | 2 ++ net/ipv4/ip_input.c | 2 ++ net/ipv4/ip_options.c | 2 ++ net/ipv4/route.c | 4 +++- net/ipv4/tcp.c | 11 +++++++---- net/ipv4/tcp_cong.c | 8 +++++--- net/ipv4/tcp_input.c | 2 ++ net/ipv4/tcp_ipv4.c | 6 +++--- net/ipv4/tcp_probe.c | 4 +++- net/ipv4/tcp_timer.c | 14 ++++++++------ net/ipv4/udp.c | 34 ++++++++++++++-------------------- net/ipv4/udplite.c | 3 +++ net/ipv4/xfrm4_tunnel.c | 2 ++ 20 files changed, 81 insertions(+), 50 deletions(-) (limited to 'net/ipv4') diff --git a/include/net/udplite.h b/include/net/udplite.h index 5f097ca7d5c5..71375459a884 100644 --- a/include/net/udplite.h +++ b/include/net/udplite.h @@ -40,7 +40,7 @@ static inline int udplite_checksum_init(struct sk_buff *skb, struct udphdr *uh) * checksum. UDP-Lite (like IPv6) mandates checksums, hence packets * with a zero checksum field are illegal. */ if (uh->check == 0) { - LIMIT_NETDEBUG(KERN_DEBUG "UDPLITE: zeroed checksum field\n"); + LIMIT_NETDEBUG(KERN_DEBUG "UDPLite: zeroed checksum field\n"); return 1; } @@ -52,7 +52,7 @@ static inline int udplite_checksum_init(struct sk_buff *skb, struct udphdr *uh) /* * Coverage length violates RFC 3828: log and discard silently. */ - LIMIT_NETDEBUG(KERN_DEBUG "UDPLITE: bad csum coverage %d/%d\n", + LIMIT_NETDEBUG(KERN_DEBUG "UDPLite: bad csum coverage %d/%d\n", cscov, skb->len); return 1; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index b7a946611f2f..fdf49fd44bb4 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -65,6 +65,8 @@ * 2 of the License, or (at your option) any later version. */ +#define pr_fmt(fmt) "IPv4: " fmt + #include #include #include diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index b5524660c81d..fd508b526014 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -1,3 +1,5 @@ +#define pr_fmt(fmt) "IPsec: " fmt + #include #include #include @@ -445,9 +447,10 @@ static int ah_init_state(struct xfrm_state *x) if (aalg_desc->uinfo.auth.icv_fullbits/8 != crypto_ahash_digestsize(ahash)) { - pr_info("AH: %s digestsize %u != %hu\n", - x->aalg->alg_name, crypto_ahash_digestsize(ahash), - aalg_desc->uinfo.auth.icv_fullbits/8); + pr_info("%s: %s digestsize %u != %hu\n", + __func__, x->aalg->alg_name, + crypto_ahash_digestsize(ahash), + aalg_desc->uinfo.auth.icv_fullbits / 8); goto error; } diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 0a86dbe9454b..89a47b35905d 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -1,3 +1,5 @@ +#define pr_fmt(fmt) "IPsec: " fmt + #include #include #include diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c index 8cb1ebb7cd74..42a491055c76 100644 --- a/net/ipv4/gre.c +++ b/net/ipv4/gre.c @@ -10,6 +10,8 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -118,10 +120,10 @@ static const struct net_protocol net_gre_protocol = { static int __init gre_init(void) { - pr_info("GRE over IPv4 demultiplexor driver"); + pr_info("GRE over IPv4 demultiplexor driver\n"); if (inet_add_protocol(&net_gre_protocol, IPPROTO_GRE) < 0) { - pr_err("gre: can't add protocol\n"); + pr_err("can't add protocol\n"); return -EAGAIN; } diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 5c7323b7810f..9664d353ccd8 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -62,6 +62,8 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -670,7 +672,7 @@ static void icmp_unreach(struct sk_buff *skb) break; case ICMP_FRAG_NEEDED: if (ipv4_config.no_pmtu_disc) { - LIMIT_NETDEBUG(KERN_INFO "ICMP: %pI4: fragmentation needed and DF set\n", + LIMIT_NETDEBUG(KERN_INFO pr_fmt("%pI4: fragmentation needed and DF set\n"), &iph->daddr); } else { info = ip_rt_frag_needed(net, iph, @@ -681,7 +683,7 @@ static void icmp_unreach(struct sk_buff *skb) } break; case ICMP_SR_FAILED: - LIMIT_NETDEBUG(KERN_INFO "ICMP: %pI4: Source Route Failed\n", + LIMIT_NETDEBUG(KERN_INFO pr_fmt("%pI4: Source Route Failed\n"), &iph->daddr); break; default: diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index b2fd333abd59..3727e234c884 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -20,6 +20,8 @@ * Patrick McHardy : LRU queue of frag heads for evictor. */ +#define pr_fmt(fmt) "IPv4: " fmt + #include #include #include @@ -299,7 +301,7 @@ static inline struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user) return container_of(q, struct ipq, q); out_nomem: - LIMIT_NETDEBUG(KERN_ERR "ip_frag_create: no memory left !\n"); + LIMIT_NETDEBUG(KERN_ERR pr_fmt("ip_frag_create: no memory left !\n")); return NULL; } @@ -637,8 +639,8 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, return 0; out_nomem: - LIMIT_NETDEBUG(KERN_ERR "IP: queue_glue: no memory for gluing " - "queue %p\n", qp); + LIMIT_NETDEBUG(KERN_ERR pr_fmt("queue_glue: no memory for gluing queue %p\n"), + qp); err = -ENOMEM; goto out_fail; out_oversize: diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 185e0a75ecec..b57532d4742c 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -10,6 +10,8 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index de8e9be4aac8..f3f1108940f5 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -113,6 +113,8 @@ * 2 of the License, or (at your option) any later version. */ +#define pr_fmt(fmt) "IPv4: " fmt + #include #include #include diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index e25ee7c8bdde..a0d0d9d9b870 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -9,6 +9,8 @@ * */ +#define pr_fmt(fmt) "IPv4: " fmt + #include #include #include diff --git a/net/ipv4/route.c b/net/ipv4/route.c index cbda6de0a77c..12ccf880eb88 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -62,6 +62,8 @@ * 2 of the License, or (at your option) any later version. */ +#define pr_fmt(fmt) "IPv4: " fmt + #include #include #include @@ -1298,7 +1300,7 @@ restart: } if (net_ratelimit()) - pr_warn("ipv4: Neighbour table overflow\n"); + pr_warn("Neighbour table overflow\n"); rt_drop(rt); return ERR_PTR(-ENOBUFS); } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 2baba1bed810..cfd7edda0a8e 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -245,6 +245,8 @@ * TCP_CLOSE socket is finished */ +#define pr_fmt(fmt) "TCP: " fmt + #include #include #include @@ -1675,7 +1677,8 @@ do_prequeue: if (tp->ucopy.dma_cookie < 0) { - pr_alert("dma_cookie < 0\n"); + pr_alert("%s: dma_cookie < 0\n", + __func__); /* Exception. Bailout! */ if (!copied) @@ -1884,9 +1887,9 @@ bool tcp_check_oom(struct sock *sk, int shift) out_of_socket_memory = tcp_out_of_memory(sk); if (too_many_orphans && net_ratelimit()) - pr_info("TCP: too many orphaned sockets\n"); + pr_info("too many orphaned sockets\n"); if (out_of_socket_memory && net_ratelimit()) - pr_info("TCP: out of memory -- consider tuning tcp_mem\n"); + pr_info("out of memory -- consider tuning tcp_mem\n"); return too_many_orphans || out_of_socket_memory; } @@ -3311,7 +3314,7 @@ void __init tcp_init(void) sysctl_tcp_rmem[1] = 87380; sysctl_tcp_rmem[2] = max(87380, max_share); - pr_info("TCP: Hash tables configured (established %u bind %u)\n", + pr_info("Hash tables configured (established %u bind %u)\n", tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size); tcp_register_congestion_control(&tcp_reno); diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 67bab75f441f..272a84593c85 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -6,6 +6,8 @@ * Copyright (C) 2005 Stephen Hemminger */ +#define pr_fmt(fmt) "TCP: " fmt + #include #include #include @@ -41,17 +43,17 @@ int tcp_register_congestion_control(struct tcp_congestion_ops *ca) /* all algorithms must implement ssthresh and cong_avoid ops */ if (!ca->ssthresh || !ca->cong_avoid) { - pr_err("TCP %s does not implement required ops\n", ca->name); + pr_err("%s does not implement required ops\n", ca->name); return -EINVAL; } spin_lock(&tcp_cong_list_lock); if (tcp_ca_find(ca->name)) { - pr_notice("TCP %s already registered\n", ca->name); + pr_notice("%s already registered\n", ca->name); ret = -EEXIST; } else { list_add_tail_rcu(&ca->list, &tcp_cong_list); - pr_info("TCP %s registered\n", ca->name); + pr_info("%s registered\n", ca->name); } spin_unlock(&tcp_cong_list_lock); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 23ca3663d1e8..68d4057cba00 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -61,6 +61,8 @@ * Pasi Sarolahti: F-RTO for dealing with spurious RTOs */ +#define pr_fmt(fmt) "TCP: " fmt + #include #include #include diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 257dba66eaca..fe9f604ed1e2 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -50,6 +50,7 @@ * a single port at the same time. */ +#define pr_fmt(fmt) "TCP: " fmt #include #include @@ -876,8 +877,7 @@ int tcp_syn_flood_action(struct sock *sk, lopt = inet_csk(sk)->icsk_accept_queue.listen_opt; if (!lopt->synflood_warned) { lopt->synflood_warned = 1; - pr_info("%s: Possible SYN flooding on port %d. %s. " - " Check SNMP counters.\n", + pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n", proto, ntohs(tcp_hdr(skb)->dest), msg); } return want_cookie; @@ -1399,7 +1399,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) * to destinations, already remembered * to the moment of synflood. */ - LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI4/%u\n", + LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"), &saddr, ntohs(tcp_hdr(skb)->source)); goto drop_and_release; } diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index 85ee7eb7e38e..a981cdc0a6e9 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -18,6 +18,8 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -239,7 +241,7 @@ static __init int tcpprobe_init(void) if (ret) goto err1; - pr_info("TCP probe registered (port=%d) bufsize=%u\n", port, bufsize); + pr_info("probe registered (port=%d) bufsize=%u\n", port, bufsize); return 0; err1: proc_net_remove(&init_net, procname); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index cd2e0723266d..34d4a02c2f16 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -333,16 +333,18 @@ void tcp_retransmit_timer(struct sock *sk) */ struct inet_sock *inet = inet_sk(sk); if (sk->sk_family == AF_INET) { - LIMIT_NETDEBUG(KERN_DEBUG "TCP: Peer %pI4:%u/%u unexpectedly shrunk window %u:%u (repaired)\n", - &inet->inet_daddr, ntohs(inet->inet_dport), - inet->inet_num, tp->snd_una, tp->snd_nxt); + LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("Peer %pI4:%u/%u unexpectedly shrunk window %u:%u (repaired)\n"), + &inet->inet_daddr, + ntohs(inet->inet_dport), inet->inet_num, + tp->snd_una, tp->snd_nxt); } #if IS_ENABLED(CONFIG_IPV6) else if (sk->sk_family == AF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); - LIMIT_NETDEBUG(KERN_DEBUG "TCP: Peer %pI6:%u/%u unexpectedly shrunk window %u:%u (repaired)\n", - &np->daddr, ntohs(inet->inet_dport), - inet->inet_num, tp->snd_una, tp->snd_nxt); + LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("Peer %pI6:%u/%u unexpectedly shrunk window %u:%u (repaired)\n"), + &np->daddr, + ntohs(inet->inet_dport), inet->inet_num, + tp->snd_una, tp->snd_nxt); } #endif if (tcp_time_stamp - tp->rcv_tstamp > TCP_RTO_MAX) { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 7c41ab84e72e..d6f5feeb3eaf 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -77,6 +77,8 @@ * 2 of the License, or (at your option) any later version. */ +#define pr_fmt(fmt) "UDP: " fmt + #include #include #include @@ -975,7 +977,7 @@ back_from_confirm: /* ... which is an evident application bug. --ANK */ release_sock(sk); - LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 2\n"); + LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("cork app bug 2\n")); err = -EINVAL; goto out; } @@ -1054,7 +1056,7 @@ int udp_sendpage(struct sock *sk, struct page *page, int offset, if (unlikely(!up->pending)) { release_sock(sk); - LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 3\n"); + LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("udp cork app bug 3\n")); return -EINVAL; } @@ -1447,9 +1449,8 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) * provided by the application." */ if (up->pcrlen == 0) { /* full coverage was set */ - LIMIT_NETDEBUG(KERN_WARNING "UDPLITE: partial coverage " - "%d while full coverage %d requested\n", - UDP_SKB_CB(skb)->cscov, skb->len); + LIMIT_NETDEBUG(KERN_WARNING "UDPLite: partial coverage %d while full coverage %d requested\n", + UDP_SKB_CB(skb)->cscov, skb->len); goto drop; } /* The next case involves violating the min. coverage requested @@ -1459,9 +1460,8 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) * Therefore the above ...()->partial_cov statement is essential. */ if (UDP_SKB_CB(skb)->cscov < up->pcrlen) { - LIMIT_NETDEBUG(KERN_WARNING - "UDPLITE: coverage %d too small, need min %d\n", - UDP_SKB_CB(skb)->cscov, up->pcrlen); + LIMIT_NETDEBUG(KERN_WARNING "UDPLite: coverage %d too small, need min %d\n", + UDP_SKB_CB(skb)->cscov, up->pcrlen); goto drop; } } @@ -1689,13 +1689,10 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, short_packet: LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From %pI4:%u %d/%d to %pI4:%u\n", - proto == IPPROTO_UDPLITE ? "-Lite" : "", - &saddr, - ntohs(uh->source), - ulen, - skb->len, - &daddr, - ntohs(uh->dest)); + proto == IPPROTO_UDPLITE ? "Lite" : "", + &saddr, ntohs(uh->source), + ulen, skb->len, + &daddr, ntohs(uh->dest)); goto drop; csum_error: @@ -1704,11 +1701,8 @@ csum_error: * the network is concerned, anyway) as per 4.1.3.4 (MUST). */ LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From %pI4:%u to %pI4:%u ulen %d\n", - proto == IPPROTO_UDPLITE ? "-Lite" : "", - &saddr, - ntohs(uh->source), - &daddr, - ntohs(uh->dest), + proto == IPPROTO_UDPLITE ? "Lite" : "", + &saddr, ntohs(uh->source), &daddr, ntohs(uh->dest), ulen); drop: UDP_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index 280bfb12adcc..2c46acd4cc36 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -10,6 +10,9 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ + +#define pr_fmt(fmt) "UDPLite: " fmt + #include #include "udp_impl.h" diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c index 5b48f49df0ba..05a5df2febc9 100644 --- a/net/ipv4/xfrm4_tunnel.c +++ b/net/ipv4/xfrm4_tunnel.c @@ -3,6 +3,8 @@ * Copyright (C) 2003 David S. Miller (davem@redhat.com) */ +#define pr_fmt(fmt) "IPsec: " fmt + #include #include #include -- cgit v1.2.3 From 124d37e9f088a8f56494b0264d63d22555f53fef Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Thu, 15 Mar 2012 05:25:58 +0000 Subject: arp: allow arp processing to honor per interface arp_accept sysctl I found recently that the arp_process function which handles all of our received arp frames, is using IPV4_DEVCONF_ALL macro to check the state of the arp_process flag. This seems wrong, as it implies that either none or all of the network interfaces accept gratuitous arps. This patch corrects that, allowing per-interface arp_accept configuration to deviate from the all setting. Note this also brings us into line with the way the arp_filter setting is handled during arp_process execution. Tested this myself on my home network, and confirmed it works as expected. Signed-off-by: Neil Horman CC: "David S. Miller" Signed-off-by: David S. Miller --- include/linux/inetdevice.h | 1 + net/ipv4/arp.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'net/ipv4') diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 5f8146695b7f..597f4a9f3240 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -139,6 +139,7 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev) IN_DEV_ORCONF((in_dev), ACCEPT_REDIRECTS))) #define IN_DEV_ARPFILTER(in_dev) IN_DEV_ORCONF((in_dev), ARPFILTER) +#define IN_DEV_ARP_ACCEPT(in_dev) IN_DEV_ORCONF((in_dev), ARP_ACCEPT) #define IN_DEV_ARP_ANNOUNCE(in_dev) IN_DEV_MAXCONF((in_dev), ARP_ANNOUNCE) #define IN_DEV_ARP_IGNORE(in_dev) IN_DEV_MAXCONF((in_dev), ARP_IGNORE) #define IN_DEV_ARP_NOTIFY(in_dev) IN_DEV_MAXCONF((in_dev), ARP_NOTIFY) diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 63e49890ad31..73f46d691abc 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -889,7 +889,7 @@ static int arp_process(struct sk_buff *skb) n = __neigh_lookup(&arp_tbl, &sip, dev, 0); - if (IPV4_DEVCONF_ALL(dev_net(dev), ARP_ACCEPT)) { + if (IN_DEV_ARP_ACCEPT(in_dev)) { /* Unsolicited ARP is not accepted by default. It is possible, that this option should be enabled for some devices (strip is candidate) -- cgit v1.2.3 From e86b291962cbf477e35d983d312428cf737bc0f8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 18 Mar 2012 11:06:44 +0000 Subject: tcp: introduce tcp_data_queue_ofo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Split tcp_data_queue() in two parts for better readability. tcp_data_queue_ofo() is responsible for queueing incoming skb into out of order queue. Change code layout so that the skb_set_owner_r() is performed only if skb is not dropped. This is a preliminary patch before "reduce out_of_order memory use" following patch. Signed-off-by: Eric Dumazet Cc: Neal Cardwell Cc: Yuchung Cheng Cc: H.K. Jerry Chu Cc: Tom Herbert Cc: Ilpo Järvinen Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 214 +++++++++++++++++++++++++++------------------------ 1 file changed, 115 insertions(+), 99 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 68d4057cba00..fa7de12c4a52 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4446,6 +4446,120 @@ static inline int tcp_try_rmem_schedule(struct sock *sk, unsigned int size) return 0; } +static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct sk_buff *skb1; + u32 seq, end_seq; + + TCP_ECN_check_ce(tp, skb); + + if (tcp_try_rmem_schedule(sk, skb->truesize)) { + /* TODO: should increment a counter */ + __kfree_skb(skb); + return; + } + + /* Disable header prediction. */ + tp->pred_flags = 0; + inet_csk_schedule_ack(sk); + + SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n", + tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); + + skb1 = skb_peek_tail(&tp->out_of_order_queue); + if (!skb1) { + /* Initial out of order segment, build 1 SACK. */ + if (tcp_is_sack(tp)) { + tp->rx_opt.num_sacks = 1; + tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq; + tp->selective_acks[0].end_seq = + TCP_SKB_CB(skb)->end_seq; + } + __skb_queue_head(&tp->out_of_order_queue, skb); + goto end; + } + + seq = TCP_SKB_CB(skb)->seq; + end_seq = TCP_SKB_CB(skb)->end_seq; + + if (seq == TCP_SKB_CB(skb1)->end_seq) { + __skb_queue_after(&tp->out_of_order_queue, skb1, skb); + + if (!tp->rx_opt.num_sacks || + tp->selective_acks[0].end_seq != seq) + goto add_sack; + + /* Common case: data arrive in order after hole. */ + tp->selective_acks[0].end_seq = end_seq; + goto end; + } + + /* Find place to insert this segment. */ + while (1) { + if (!after(TCP_SKB_CB(skb1)->seq, seq)) + break; + if (skb_queue_is_first(&tp->out_of_order_queue, skb1)) { + skb1 = NULL; + break; + } + skb1 = skb_queue_prev(&tp->out_of_order_queue, skb1); + } + + /* Do skb overlap to previous one? */ + if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) { + if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) { + /* All the bits are present. Drop. */ + __kfree_skb(skb); + skb = NULL; + tcp_dsack_set(sk, seq, end_seq); + goto add_sack; + } + if (after(seq, TCP_SKB_CB(skb1)->seq)) { + /* Partial overlap. */ + tcp_dsack_set(sk, seq, + TCP_SKB_CB(skb1)->end_seq); + } else { + if (skb_queue_is_first(&tp->out_of_order_queue, + skb1)) + skb1 = NULL; + else + skb1 = skb_queue_prev( + &tp->out_of_order_queue, + skb1); + } + } + if (!skb1) + __skb_queue_head(&tp->out_of_order_queue, skb); + else + __skb_queue_after(&tp->out_of_order_queue, skb1, skb); + + /* And clean segments covered by new one as whole. */ + while (!skb_queue_is_last(&tp->out_of_order_queue, skb)) { + skb1 = skb_queue_next(&tp->out_of_order_queue, skb); + + if (!after(end_seq, TCP_SKB_CB(skb1)->seq)) + break; + if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) { + tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq, + end_seq); + break; + } + __skb_unlink(skb1, &tp->out_of_order_queue); + tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq, + TCP_SKB_CB(skb1)->end_seq); + __kfree_skb(skb1); + } + +add_sack: + if (tcp_is_sack(tp)) + tcp_sack_new_ofo_skb(sk, seq, end_seq); +end: + if (skb) + skb_set_owner_r(skb, sk); +} + + static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) { const struct tcphdr *th = tcp_hdr(skb); @@ -4561,105 +4675,7 @@ drop: goto queue_and_out; } - TCP_ECN_check_ce(tp, skb); - - if (tcp_try_rmem_schedule(sk, skb->truesize)) - goto drop; - - /* Disable header prediction. */ - tp->pred_flags = 0; - inet_csk_schedule_ack(sk); - - SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n", - tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); - - skb_set_owner_r(skb, sk); - - if (!skb_peek(&tp->out_of_order_queue)) { - /* Initial out of order segment, build 1 SACK. */ - if (tcp_is_sack(tp)) { - tp->rx_opt.num_sacks = 1; - tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq; - tp->selective_acks[0].end_seq = - TCP_SKB_CB(skb)->end_seq; - } - __skb_queue_head(&tp->out_of_order_queue, skb); - } else { - struct sk_buff *skb1 = skb_peek_tail(&tp->out_of_order_queue); - u32 seq = TCP_SKB_CB(skb)->seq; - u32 end_seq = TCP_SKB_CB(skb)->end_seq; - - if (seq == TCP_SKB_CB(skb1)->end_seq) { - __skb_queue_after(&tp->out_of_order_queue, skb1, skb); - - if (!tp->rx_opt.num_sacks || - tp->selective_acks[0].end_seq != seq) - goto add_sack; - - /* Common case: data arrive in order after hole. */ - tp->selective_acks[0].end_seq = end_seq; - return; - } - - /* Find place to insert this segment. */ - while (1) { - if (!after(TCP_SKB_CB(skb1)->seq, seq)) - break; - if (skb_queue_is_first(&tp->out_of_order_queue, skb1)) { - skb1 = NULL; - break; - } - skb1 = skb_queue_prev(&tp->out_of_order_queue, skb1); - } - - /* Do skb overlap to previous one? */ - if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) { - if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) { - /* All the bits are present. Drop. */ - __kfree_skb(skb); - tcp_dsack_set(sk, seq, end_seq); - goto add_sack; - } - if (after(seq, TCP_SKB_CB(skb1)->seq)) { - /* Partial overlap. */ - tcp_dsack_set(sk, seq, - TCP_SKB_CB(skb1)->end_seq); - } else { - if (skb_queue_is_first(&tp->out_of_order_queue, - skb1)) - skb1 = NULL; - else - skb1 = skb_queue_prev( - &tp->out_of_order_queue, - skb1); - } - } - if (!skb1) - __skb_queue_head(&tp->out_of_order_queue, skb); - else - __skb_queue_after(&tp->out_of_order_queue, skb1, skb); - - /* And clean segments covered by new one as whole. */ - while (!skb_queue_is_last(&tp->out_of_order_queue, skb)) { - skb1 = skb_queue_next(&tp->out_of_order_queue, skb); - - if (!after(end_seq, TCP_SKB_CB(skb1)->seq)) - break; - if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) { - tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq, - end_seq); - break; - } - __skb_unlink(skb1, &tp->out_of_order_queue); - tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq, - TCP_SKB_CB(skb1)->end_seq); - __kfree_skb(skb1); - } - -add_sack: - if (tcp_is_sack(tp)) - tcp_sack_new_ofo_skb(sk, seq, end_seq); - } + tcp_data_queue_ofo(sk, skb); } static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb, -- cgit v1.2.3 From c8628155ece363487b57d33441ea0359018c0fa7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 18 Mar 2012 11:07:47 +0000 Subject: tcp: reduce out_of_order memory use MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With increasing receive window sizes, but speed of light not improved that much, out of order queue can contain a huge number of skbs, waiting to be moved to receive_queue when missing packets can fill the holes. Some devices happen to use fat skbs (truesize of 4096 + sizeof(struct sk_buff)) to store regular (MTU <= 1500) frames. This makes highly probable sk_rmem_alloc hits sk_rcvbuf limit, which can be 4Mbytes in many cases. When limit is hit, tcp stack calls tcp_collapse_ofo_queue(), a true latency killer and cpu cache blower. Doing the coalescing attempt each time we add a frame in ofo queue permits to keep memory use tight and in many cases avoid the tcp_collapse() thing later. Tested on various wireless setups (b43, ath9k, ...) known to use big skb truesize, this patch removed the "packets collapsed in receive queue due to low socket buffer" I had before. This also reduced average memory used by tcp sockets. With help from Neal Cardwell. Signed-off-by: Eric Dumazet Cc: Neal Cardwell Cc: Yuchung Cheng Cc: H.K. Jerry Chu Cc: Tom Herbert Cc: Ilpo Järvinen Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- include/linux/snmp.h | 1 + net/ipv4/proc.c | 1 + net/ipv4/tcp_input.c | 19 ++++++++++++++++++- 3 files changed, 20 insertions(+), 1 deletion(-) (limited to 'net/ipv4') diff --git a/include/linux/snmp.h b/include/linux/snmp.h index 8ee8af4e6da9..2e68f5ba0389 100644 --- a/include/linux/snmp.h +++ b/include/linux/snmp.h @@ -233,6 +233,7 @@ enum LINUX_MIB_TCPREQQFULLDOCOOKIES, /* TCPReqQFullDoCookies */ LINUX_MIB_TCPREQQFULLDROP, /* TCPReqQFullDrop */ LINUX_MIB_TCPRETRANSFAIL, /* TCPRetransFail */ + LINUX_MIB_TCPRCVCOALESCE, /* TCPRcvCoalesce */ __LINUX_MIB_MAX }; diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 02d61079f08b..8af0d44e4e22 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -257,6 +257,7 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPReqQFullDoCookies", LINUX_MIB_TCPREQQFULLDOCOOKIES), SNMP_MIB_ITEM("TCPReqQFullDrop", LINUX_MIB_TCPREQQFULLDROP), SNMP_MIB_ITEM("TCPRetransFail", LINUX_MIB_TCPRETRANSFAIL), + SNMP_MIB_ITEM("TCPRcvCoalesce", LINUX_MIB_TCPRCVCOALESCE), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index fa7de12c4a52..e886e2f7fa8d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4484,7 +4484,24 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) end_seq = TCP_SKB_CB(skb)->end_seq; if (seq == TCP_SKB_CB(skb1)->end_seq) { - __skb_queue_after(&tp->out_of_order_queue, skb1, skb); + /* Packets in ofo can stay in queue a long time. + * Better try to coalesce them right now + * to avoid future tcp_collapse_ofo_queue(), + * probably the most expensive function in tcp stack. + */ + if (skb->len <= skb_tailroom(skb1) && !tcp_hdr(skb)->fin) { + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPRCVCOALESCE); + BUG_ON(skb_copy_bits(skb, 0, + skb_put(skb1, skb->len), + skb->len)); + TCP_SKB_CB(skb1)->end_seq = end_seq; + TCP_SKB_CB(skb1)->ack_seq = TCP_SKB_CB(skb)->ack_seq; + __kfree_skb(skb); + skb = NULL; + } else { + __skb_queue_after(&tp->out_of_order_queue, skb1, skb); + } if (!tp->rx_opt.num_sacks || tp->selective_acks[0].end_seq != seq) -- cgit v1.2.3