From 4eaa0e3c869acd5dbc7c2e3818a9ae9cbf221d27 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 14 Apr 2010 16:13:29 -0700 Subject: fib: suppress lockdep-RCU false positive in FIB trie. Followup of commit 634a4b20 Allow tnode_get_child_rcu() to be called either under rcu_read_lock() protection or with RTNL held. Signed-off-by: Eric Dumazet Signed-off-by: Paul E. McKenney Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net/ipv4') diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 59a838795e3e..c98f115fb0fd 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -209,7 +209,9 @@ static inline struct node *tnode_get_child_rcu(struct tnode *tn, unsigned int i) { struct node *ret = tnode_get_child(tn, i); - return rcu_dereference(ret); + return rcu_dereference_check(ret, + rcu_read_lock_held() || + lockdep_rtnl_is_held()); } static inline int tnode_child_length(const struct tnode *tn) -- cgit v1.2.3 From e30b38c298b55e09456d3ccbc1df2f3e2e8dc6e9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 15 Apr 2010 09:13:03 +0000 Subject: ip: Fix ip_dev_loopback_xmit() Eric Paris got following trace with a linux-next kernel [ 14.203970] BUG: using smp_processor_id() in preemptible [00000000] code: avahi-daemon/2093 [ 14.204025] caller is netif_rx+0xfa/0x110 [ 14.204035] Call Trace: [ 14.204064] [] debug_smp_processor_id+0x105/0x110 [ 14.204070] [] netif_rx+0xfa/0x110 [ 14.204090] [] ip_dev_loopback_xmit+0x71/0xa0 [ 14.204095] [] ip_mc_output+0x192/0x2c0 [ 14.204099] [] ip_local_out+0x20/0x30 [ 14.204105] [] ip_push_pending_frames+0x28d/0x3d0 [ 14.204119] [] udp_push_pending_frames+0x14c/0x400 [ 14.204125] [] udp_sendmsg+0x39c/0x790 [ 14.204137] [] inet_sendmsg+0x45/0x80 [ 14.204149] [] sock_sendmsg+0xf1/0x110 [ 14.204189] [] sys_sendmsg+0x20c/0x380 [ 14.204233] [] system_call_fastpath+0x16/0x1b While current linux-2.6 kernel doesnt emit this warning, bug is latent and might cause unexpected failures. ip_dev_loopback_xmit() runs in process context, preemption enabled, so must call netif_rx_ni() instead of netif_rx(), to make sure that we process pending software interrupt. Same change for ip6_dev_loopback_xmit() Reported-by: Eric Paris Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 2 +- net/ipv6/ip6_output.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index c65f18e0936e..d1bcc9f21d4f 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -120,7 +120,7 @@ static int ip_dev_loopback_xmit(struct sk_buff *newskb) newskb->pkt_type = PACKET_LOOPBACK; newskb->ip_summed = CHECKSUM_UNNECESSARY; WARN_ON(!skb_dst(newskb)); - netif_rx(newskb); + netif_rx_ni(newskb); return 0; } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 16c4391f952b..65f9c379df38 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -108,7 +108,7 @@ static int ip6_dev_loopback_xmit(struct sk_buff *newskb) newskb->ip_summed = CHECKSUM_UNNECESSARY; WARN_ON(!skb_dst(newskb)); - netif_rx(newskb); + netif_rx_ni(newskb); return 0; } -- cgit v1.2.3 From aa395145165cb06a0d0885221bbe0ce4a564391d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 20 Apr 2010 13:03:51 +0000 Subject: net: sk_sleep() helper Define a new function to return the waitqueue of a "struct sock". static inline wait_queue_head_t *sk_sleep(struct sock *sk) { return sk->sk_sleep; } Change all read occurrences of sk_sleep by a call to this function. Needed for a future RCU conversion. sk_sleep wont be a field directly available. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/atm/atmtcp.c | 6 ++-- drivers/net/macvtap.c | 4 +-- drivers/net/tun.c | 4 +-- drivers/scsi/iscsi_tcp.c | 4 +-- include/net/sock.h | 10 +++++-- include/net/tcp.h | 2 +- net/atm/common.c | 12 ++++---- net/atm/signaling.c | 2 +- net/atm/svc.c | 62 ++++++++++++++++++++--------------------- net/ax25/af_ax25.c | 8 +++--- net/bluetooth/af_bluetooth.c | 6 ++-- net/bluetooth/bnep/core.c | 8 +++--- net/bluetooth/bnep/netdev.c | 6 ++-- net/bluetooth/cmtp/cmtp.h | 2 +- net/bluetooth/cmtp/core.c | 4 +-- net/bluetooth/hidp/core.c | 10 +++---- net/bluetooth/hidp/hidp.h | 4 +-- net/bluetooth/l2cap.c | 4 +-- net/bluetooth/rfcomm/sock.c | 8 +++--- net/bluetooth/sco.c | 4 +-- net/caif/caif_socket.c | 2 +- net/core/datagram.c | 6 ++-- net/core/sock.c | 16 +++++------ net/core/stream.c | 16 +++++------ net/dccp/output.c | 6 ++-- net/dccp/proto.c | 2 +- net/decnet/af_decnet.c | 26 ++++++++--------- net/ipv4/af_inet.c | 6 ++-- net/ipv4/inet_connection_sock.c | 4 +-- net/ipv4/tcp.c | 2 +- net/irda/af_irda.c | 14 +++++----- net/iucv/af_iucv.c | 12 ++++---- net/llc/af_llc.c | 12 ++++---- net/netfilter/ipvs/ip_vs_sync.c | 2 +- net/netrom/af_netrom.c | 8 +++--- net/rds/af_rds.c | 2 +- net/rds/rds.h | 2 +- net/rds/recv.c | 2 +- net/rds/send.c | 2 +- net/rose/af_rose.c | 8 +++--- net/rxrpc/af_rxrpc.c | 4 +-- net/sctp/socket.c | 20 ++++++------- net/sunrpc/svcsock.c | 24 ++++++++-------- net/tipc/socket.c | 26 ++++++++--------- net/unix/af_unix.c | 10 +++---- net/x25/af_x25.c | 8 +++--- 46 files changed, 208 insertions(+), 204 deletions(-) (limited to 'net/ipv4') diff --git a/drivers/atm/atmtcp.c b/drivers/atm/atmtcp.c index b86712167eb8..b9101818b47b 100644 --- a/drivers/atm/atmtcp.c +++ b/drivers/atm/atmtcp.c @@ -68,7 +68,7 @@ static int atmtcp_send_control(struct atm_vcc *vcc,int type, *(struct atm_vcc **) &new_msg->vcc = vcc; old_test = test_bit(flag,&vcc->flags); out_vcc->push(out_vcc,skb); - add_wait_queue(sk_atm(vcc)->sk_sleep, &wait); + add_wait_queue(sk_sleep(sk_atm(vcc)), &wait); while (test_bit(flag,&vcc->flags) == old_test) { mb(); out_vcc = PRIV(vcc->dev) ? PRIV(vcc->dev)->vcc : NULL; @@ -80,7 +80,7 @@ static int atmtcp_send_control(struct atm_vcc *vcc,int type, schedule(); } set_current_state(TASK_RUNNING); - remove_wait_queue(sk_atm(vcc)->sk_sleep, &wait); + remove_wait_queue(sk_sleep(sk_atm(vcc)), &wait); return error; } @@ -105,7 +105,7 @@ static int atmtcp_recv_control(const struct atmtcp_control *msg) msg->type); return -EINVAL; } - wake_up(sk_atm(vcc)->sk_sleep); + wake_up(sk_sleep(sk_atm(vcc))); return 0; } diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index abba3cc81f12..85d6420f8404 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -246,8 +246,8 @@ static void macvtap_sock_write_space(struct sock *sk) !test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags)) return; - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) - wake_up_interruptible_poll(sk->sk_sleep, POLLOUT | POLLWRNORM | POLLWRBAND); + if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) + wake_up_interruptible_poll(sk_sleep(sk), POLLOUT | POLLWRNORM | POLLWRBAND); } static int macvtap_open(struct inode *inode, struct file *file) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 43265207d463..20a17938c62b 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -868,8 +868,8 @@ static void tun_sock_write_space(struct sock *sk) if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags)) return; - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) - wake_up_interruptible_sync_poll(sk->sk_sleep, POLLOUT | + if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) + wake_up_interruptible_sync_poll(sk_sleep(sk), POLLOUT | POLLWRNORM | POLLWRBAND); tun = tun_sk(sk)->tun; diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c index 0ee725ced511..9eae04afa9a0 100644 --- a/drivers/scsi/iscsi_tcp.c +++ b/drivers/scsi/iscsi_tcp.c @@ -599,9 +599,9 @@ static void iscsi_sw_tcp_conn_stop(struct iscsi_cls_conn *cls_conn, int flag) set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_rx); write_unlock_bh(&tcp_sw_conn->sock->sk->sk_callback_lock); - if (sock->sk->sk_sleep && waitqueue_active(sock->sk->sk_sleep)) { + if (sk_sleep(sock->sk) && waitqueue_active(sk_sleep(sock->sk))) { sock->sk->sk_err = EIO; - wake_up_interruptible(sock->sk->sk_sleep); + wake_up_interruptible(sk_sleep(sock->sk)); } iscsi_conn_stop(cls_conn, flag); diff --git a/include/net/sock.h b/include/net/sock.h index 56df440a950b..8ab05146a447 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1160,6 +1160,10 @@ static inline void sk_set_socket(struct sock *sk, struct socket *sock) sk->sk_socket = sock; } +static inline wait_queue_head_t *sk_sleep(struct sock *sk) +{ + return sk->sk_sleep; +} /* Detach socket from process context. * Announce socket dead, detach it from wait queue and inode. * Note that parent inode held reference count on this struct sock, @@ -1346,8 +1350,8 @@ static inline int sk_has_allocations(const struct sock *sk) * tp->rcv_nxt check sock_def_readable * ... { * schedule ... - * if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) - * wake_up_interruptible(sk->sk_sleep) + * if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) + * wake_up_interruptible(sk_sleep(sk)) * ... * } * @@ -1368,7 +1372,7 @@ static inline int sk_has_sleeper(struct sock *sk) * This memory barrier is paired in the sock_poll_wait. */ smp_mb__after_lock(); - return sk->sk_sleep && waitqueue_active(sk->sk_sleep); + return sk_sleep(sk) && waitqueue_active(sk_sleep(sk)); } /** diff --git a/include/net/tcp.h b/include/net/tcp.h index 70c5159f4b36..b7d83d204a93 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -939,7 +939,7 @@ static inline int tcp_prequeue(struct sock *sk, struct sk_buff *skb) tp->ucopy.memory = 0; } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) { - wake_up_interruptible_sync_poll(sk->sk_sleep, + wake_up_interruptible_sync_poll(sk_sleep(sk), POLLIN | POLLRDNORM | POLLRDBAND); if (!inet_csk_ack_scheduled(sk)) inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, diff --git a/net/atm/common.c b/net/atm/common.c index 97ed94aa0cbc..e3e10e6f8628 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -92,7 +92,7 @@ static void vcc_def_wakeup(struct sock *sk) { read_lock(&sk->sk_callback_lock); if (sk_has_sleeper(sk)) - wake_up(sk->sk_sleep); + wake_up(sk_sleep(sk)); read_unlock(&sk->sk_callback_lock); } @@ -110,7 +110,7 @@ static void vcc_write_space(struct sock *sk) if (vcc_writable(sk)) { if (sk_has_sleeper(sk)) - wake_up_interruptible(sk->sk_sleep); + wake_up_interruptible(sk_sleep(sk)); sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); } @@ -549,7 +549,7 @@ int vcc_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, } eff = (size+3) & ~3; /* align to word boundary */ - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); error = 0; while (!(skb = alloc_tx(vcc, eff))) { if (m->msg_flags & MSG_DONTWAIT) { @@ -568,9 +568,9 @@ int vcc_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, send_sig(SIGPIPE, current, 0); break; } - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); if (error) goto out; skb->dev = NULL; /* for paths shared with net_device interfaces */ @@ -595,7 +595,7 @@ unsigned int vcc_poll(struct file *file, struct socket *sock, poll_table *wait) struct atm_vcc *vcc; unsigned int mask; - sock_poll_wait(file, sk->sk_sleep, wait); + sock_poll_wait(file, sk_sleep(sk), wait); mask = 0; vcc = ATM_SD(sock); diff --git a/net/atm/signaling.c b/net/atm/signaling.c index 6ba6e466ee54..509c8ac02b63 100644 --- a/net/atm/signaling.c +++ b/net/atm/signaling.c @@ -131,7 +131,7 @@ static int sigd_send(struct atm_vcc *vcc, struct sk_buff *skb) } sk->sk_ack_backlog++; skb_queue_tail(&sk->sk_receive_queue, skb); - pr_debug("waking sk->sk_sleep 0x%p\n", sk->sk_sleep); + pr_debug("waking sk_sleep(sk) 0x%p\n", sk_sleep(sk)); sk->sk_state_change(sk); as_indicate_complete: release_sock(sk); diff --git a/net/atm/svc.c b/net/atm/svc.c index 3ba9a45a51ac..754ee4791d96 100644 --- a/net/atm/svc.c +++ b/net/atm/svc.c @@ -49,14 +49,14 @@ static void svc_disconnect(struct atm_vcc *vcc) pr_debug("%p\n", vcc); if (test_bit(ATM_VF_REGIS, &vcc->flags)) { - prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE); sigd_enq(vcc, as_close, NULL, NULL, NULL); while (!test_bit(ATM_VF_RELEASED, &vcc->flags) && sigd) { schedule(); - prepare_to_wait(sk->sk_sleep, &wait, + prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE); } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); } /* beware - socket is still in use by atmsigd until the last as_indicate has been answered */ @@ -125,13 +125,13 @@ static int svc_bind(struct socket *sock, struct sockaddr *sockaddr, } vcc->local = *addr; set_bit(ATM_VF_WAITING, &vcc->flags); - prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE); sigd_enq(vcc, as_bind, NULL, NULL, &vcc->local); while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) { schedule(); - prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE); } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); clear_bit(ATM_VF_REGIS, &vcc->flags); /* doesn't count */ if (!sigd) { error = -EUNATCH; @@ -201,10 +201,10 @@ static int svc_connect(struct socket *sock, struct sockaddr *sockaddr, } vcc->remote = *addr; set_bit(ATM_VF_WAITING, &vcc->flags); - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); sigd_enq(vcc, as_connect, NULL, NULL, &vcc->remote); if (flags & O_NONBLOCK) { - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); sock->state = SS_CONNECTING; error = -EINPROGRESS; goto out; @@ -213,7 +213,7 @@ static int svc_connect(struct socket *sock, struct sockaddr *sockaddr, while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) { schedule(); if (!signal_pending(current)) { - prepare_to_wait(sk->sk_sleep, &wait, + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); continue; } @@ -232,14 +232,14 @@ static int svc_connect(struct socket *sock, struct sockaddr *sockaddr, */ sigd_enq(vcc, as_close, NULL, NULL, NULL); while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) { - prepare_to_wait(sk->sk_sleep, &wait, + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); schedule(); } if (!sk->sk_err) while (!test_bit(ATM_VF_RELEASED, &vcc->flags) && sigd) { - prepare_to_wait(sk->sk_sleep, &wait, + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); schedule(); } @@ -250,7 +250,7 @@ static int svc_connect(struct socket *sock, struct sockaddr *sockaddr, error = -EINTR; break; } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); if (error) goto out; if (!sigd) { @@ -302,13 +302,13 @@ static int svc_listen(struct socket *sock, int backlog) goto out; } set_bit(ATM_VF_WAITING, &vcc->flags); - prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE); sigd_enq(vcc, as_listen, NULL, NULL, &vcc->local); while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) { schedule(); - prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE); } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); if (!sigd) { error = -EUNATCH; goto out; @@ -343,7 +343,7 @@ static int svc_accept(struct socket *sock, struct socket *newsock, int flags) while (1) { DEFINE_WAIT(wait); - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); while (!(skb = skb_dequeue(&sk->sk_receive_queue)) && sigd) { if (test_bit(ATM_VF_RELEASED, &old_vcc->flags)) @@ -363,10 +363,10 @@ static int svc_accept(struct socket *sock, struct socket *newsock, int flags) error = -ERESTARTSYS; break; } - prepare_to_wait(sk->sk_sleep, &wait, + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); if (error) goto out; if (!skb) { @@ -392,17 +392,17 @@ static int svc_accept(struct socket *sock, struct socket *newsock, int flags) } /* wait should be short, so we ignore the non-blocking flag */ set_bit(ATM_VF_WAITING, &new_vcc->flags); - prepare_to_wait(sk_atm(new_vcc)->sk_sleep, &wait, + prepare_to_wait(sk_sleep(sk_atm(new_vcc)), &wait, TASK_UNINTERRUPTIBLE); sigd_enq(new_vcc, as_accept, old_vcc, NULL, NULL); while (test_bit(ATM_VF_WAITING, &new_vcc->flags) && sigd) { release_sock(sk); schedule(); lock_sock(sk); - prepare_to_wait(sk_atm(new_vcc)->sk_sleep, &wait, + prepare_to_wait(sk_sleep(sk_atm(new_vcc)), &wait, TASK_UNINTERRUPTIBLE); } - finish_wait(sk_atm(new_vcc)->sk_sleep, &wait); + finish_wait(sk_sleep(sk_atm(new_vcc)), &wait); if (!sigd) { error = -EUNATCH; goto out; @@ -438,14 +438,14 @@ int svc_change_qos(struct atm_vcc *vcc, struct atm_qos *qos) DEFINE_WAIT(wait); set_bit(ATM_VF_WAITING, &vcc->flags); - prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE); sigd_enq2(vcc, as_modify, NULL, NULL, &vcc->local, qos, 0); while (test_bit(ATM_VF_WAITING, &vcc->flags) && !test_bit(ATM_VF_RELEASED, &vcc->flags) && sigd) { schedule(); - prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE); } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); if (!sigd) return -EUNATCH; return -sk->sk_err; @@ -534,20 +534,20 @@ static int svc_addparty(struct socket *sock, struct sockaddr *sockaddr, lock_sock(sk); set_bit(ATM_VF_WAITING, &vcc->flags); - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); sigd_enq(vcc, as_addparty, NULL, NULL, (struct sockaddr_atmsvc *) sockaddr); if (flags & O_NONBLOCK) { - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); error = -EINPROGRESS; goto out; } pr_debug("added wait queue\n"); while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) { schedule(); - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); error = xchg(&sk->sk_err_soft, 0); out: release_sock(sk); @@ -563,13 +563,13 @@ static int svc_dropparty(struct socket *sock, int ep_ref) lock_sock(sk); set_bit(ATM_VF_WAITING, &vcc->flags); - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); sigd_enq2(vcc, as_dropparty, NULL, NULL, NULL, NULL, ep_ref); while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) { schedule(); - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); if (!sigd) { error = -EUNATCH; goto out; diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 65c5801261f9..cfdfd7e2a172 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1281,7 +1281,7 @@ static int __must_check ax25_connect(struct socket *sock, DEFINE_WAIT(wait); for (;;) { - prepare_to_wait(sk->sk_sleep, &wait, + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); if (sk->sk_state != TCP_SYN_SENT) break; @@ -1294,7 +1294,7 @@ static int __must_check ax25_connect(struct socket *sock, err = -ERESTARTSYS; break; } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); if (err) goto out_release; @@ -1346,7 +1346,7 @@ static int ax25_accept(struct socket *sock, struct socket *newsock, int flags) * hooked into the SABM we saved */ for (;;) { - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); skb = skb_dequeue(&sk->sk_receive_queue); if (skb) break; @@ -1364,7 +1364,7 @@ static int ax25_accept(struct socket *sock, struct socket *newsock, int flags) err = -ERESTARTSYS; break; } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); if (err) goto out; diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 404a8500fd03..421c45bd1b95 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -288,7 +288,7 @@ unsigned int bt_sock_poll(struct file * file, struct socket *sock, poll_table *w BT_DBG("sock %p, sk %p", sock, sk); - poll_wait(file, sk->sk_sleep, wait); + poll_wait(file, sk_sleep(sk), wait); if (sk->sk_state == BT_LISTEN) return bt_accept_poll(sk); @@ -378,7 +378,7 @@ int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo) BT_DBG("sk %p", sk); - add_wait_queue(sk->sk_sleep, &wait); + add_wait_queue(sk_sleep(sk), &wait); while (sk->sk_state != state) { set_current_state(TASK_INTERRUPTIBLE); @@ -401,7 +401,7 @@ int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo) break; } set_current_state(TASK_RUNNING); - remove_wait_queue(sk->sk_sleep, &wait); + remove_wait_queue(sk_sleep(sk), &wait); return err; } EXPORT_SYMBOL(bt_sock_wait_state); diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index 8062dad6d10d..f10b41fb05a0 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -474,7 +474,7 @@ static int bnep_session(void *arg) set_user_nice(current, -15); init_waitqueue_entry(&wait, current); - add_wait_queue(sk->sk_sleep, &wait); + add_wait_queue(sk_sleep(sk), &wait); while (!atomic_read(&s->killed)) { set_current_state(TASK_INTERRUPTIBLE); @@ -496,7 +496,7 @@ static int bnep_session(void *arg) schedule(); } set_current_state(TASK_RUNNING); - remove_wait_queue(sk->sk_sleep, &wait); + remove_wait_queue(sk_sleep(sk), &wait); /* Cleanup session */ down_write(&bnep_session_sem); @@ -507,7 +507,7 @@ static int bnep_session(void *arg) /* Wakeup user-space polling for socket errors */ s->sock->sk->sk_err = EUNATCH; - wake_up_interruptible(s->sock->sk->sk_sleep); + wake_up_interruptible(sk_sleep(s->sock->sk)); /* Release the socket */ fput(s->sock->file); @@ -638,7 +638,7 @@ int bnep_del_connection(struct bnep_conndel_req *req) /* Kill session thread */ atomic_inc(&s->killed); - wake_up_interruptible(s->sock->sk->sk_sleep); + wake_up_interruptible(sk_sleep(s->sock->sk)); } else err = -ENOENT; diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c index d48b33f4d4ba..0faad5ce6dc4 100644 --- a/net/bluetooth/bnep/netdev.c +++ b/net/bluetooth/bnep/netdev.c @@ -109,7 +109,7 @@ static void bnep_net_set_mc_list(struct net_device *dev) } skb_queue_tail(&sk->sk_write_queue, skb); - wake_up_interruptible(sk->sk_sleep); + wake_up_interruptible(sk_sleep(sk)); #endif } @@ -193,11 +193,11 @@ static netdev_tx_t bnep_net_xmit(struct sk_buff *skb, /* * We cannot send L2CAP packets from here as we are potentially in a bh. * So we have to queue them and wake up session thread which is sleeping - * on the sk->sk_sleep. + * on the sk_sleep(sk). */ dev->trans_start = jiffies; skb_queue_tail(&sk->sk_write_queue, skb); - wake_up_interruptible(sk->sk_sleep); + wake_up_interruptible(sk_sleep(sk)); if (skb_queue_len(&sk->sk_write_queue) >= BNEP_TX_QUEUE_LEN) { BT_DBG("tx queue is full"); diff --git a/net/bluetooth/cmtp/cmtp.h b/net/bluetooth/cmtp/cmtp.h index e4663aa14d26..785e79e953c5 100644 --- a/net/bluetooth/cmtp/cmtp.h +++ b/net/bluetooth/cmtp/cmtp.h @@ -125,7 +125,7 @@ static inline void cmtp_schedule(struct cmtp_session *session) { struct sock *sk = session->sock->sk; - wake_up_interruptible(sk->sk_sleep); + wake_up_interruptible(sk_sleep(sk)); } /* CMTP init defines */ diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c index 0073ec8495da..d4c6af082d48 100644 --- a/net/bluetooth/cmtp/core.c +++ b/net/bluetooth/cmtp/core.c @@ -284,7 +284,7 @@ static int cmtp_session(void *arg) set_user_nice(current, -15); init_waitqueue_entry(&wait, current); - add_wait_queue(sk->sk_sleep, &wait); + add_wait_queue(sk_sleep(sk), &wait); while (!atomic_read(&session->terminate)) { set_current_state(TASK_INTERRUPTIBLE); @@ -301,7 +301,7 @@ static int cmtp_session(void *arg) schedule(); } set_current_state(TASK_RUNNING); - remove_wait_queue(sk->sk_sleep, &wait); + remove_wait_queue(sk_sleep(sk), &wait); down_write(&cmtp_session_sem); diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 280529ad9274..bfe641b7dfaf 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -561,8 +561,8 @@ static int hidp_session(void *arg) init_waitqueue_entry(&ctrl_wait, current); init_waitqueue_entry(&intr_wait, current); - add_wait_queue(ctrl_sk->sk_sleep, &ctrl_wait); - add_wait_queue(intr_sk->sk_sleep, &intr_wait); + add_wait_queue(sk_sleep(ctrl_sk), &ctrl_wait); + add_wait_queue(sk_sleep(intr_sk), &intr_wait); while (!atomic_read(&session->terminate)) { set_current_state(TASK_INTERRUPTIBLE); @@ -584,8 +584,8 @@ static int hidp_session(void *arg) schedule(); } set_current_state(TASK_RUNNING); - remove_wait_queue(intr_sk->sk_sleep, &intr_wait); - remove_wait_queue(ctrl_sk->sk_sleep, &ctrl_wait); + remove_wait_queue(sk_sleep(intr_sk), &intr_wait); + remove_wait_queue(sk_sleep(ctrl_sk), &ctrl_wait); down_write(&hidp_session_sem); @@ -609,7 +609,7 @@ static int hidp_session(void *arg) fput(session->intr_sock->file); - wait_event_timeout(*(ctrl_sk->sk_sleep), + wait_event_timeout(*(sk_sleep(ctrl_sk)), (ctrl_sk->sk_state == BT_CLOSED), msecs_to_jiffies(500)); fput(session->ctrl_sock->file); diff --git a/net/bluetooth/hidp/hidp.h b/net/bluetooth/hidp/hidp.h index a4e215d50c10..8d934a19da0a 100644 --- a/net/bluetooth/hidp/hidp.h +++ b/net/bluetooth/hidp/hidp.h @@ -164,8 +164,8 @@ static inline void hidp_schedule(struct hidp_session *session) struct sock *ctrl_sk = session->ctrl_sock->sk; struct sock *intr_sk = session->intr_sock->sk; - wake_up_interruptible(ctrl_sk->sk_sleep); - wake_up_interruptible(intr_sk->sk_sleep); + wake_up_interruptible(sk_sleep(ctrl_sk)); + wake_up_interruptible(sk_sleep(intr_sk)); } /* HIDP init defines */ diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index 99d68c34e4f1..c1e60eed5a97 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -1147,7 +1147,7 @@ static int l2cap_sock_accept(struct socket *sock, struct socket *newsock, int fl BT_DBG("sk %p timeo %ld", sk, timeo); /* Wait for an incoming connection. (wake-one). */ - add_wait_queue_exclusive(sk->sk_sleep, &wait); + add_wait_queue_exclusive(sk_sleep(sk), &wait); while (!(nsk = bt_accept_dequeue(sk, newsock))) { set_current_state(TASK_INTERRUPTIBLE); if (!timeo) { @@ -1170,7 +1170,7 @@ static int l2cap_sock_accept(struct socket *sock, struct socket *newsock, int fl } } set_current_state(TASK_RUNNING); - remove_wait_queue(sk->sk_sleep, &wait); + remove_wait_queue(sk_sleep(sk), &wait); if (err) goto done; diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 8ed3c37684fa..43fbf6b4b4bf 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -503,7 +503,7 @@ static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int f BT_DBG("sk %p timeo %ld", sk, timeo); /* Wait for an incoming connection. (wake-one). */ - add_wait_queue_exclusive(sk->sk_sleep, &wait); + add_wait_queue_exclusive(sk_sleep(sk), &wait); while (!(nsk = bt_accept_dequeue(sk, newsock))) { set_current_state(TASK_INTERRUPTIBLE); if (!timeo) { @@ -526,7 +526,7 @@ static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int f } } set_current_state(TASK_RUNNING); - remove_wait_queue(sk->sk_sleep, &wait); + remove_wait_queue(sk_sleep(sk), &wait); if (err) goto done; @@ -621,7 +621,7 @@ static long rfcomm_sock_data_wait(struct sock *sk, long timeo) { DECLARE_WAITQUEUE(wait, current); - add_wait_queue(sk->sk_sleep, &wait); + add_wait_queue(sk_sleep(sk), &wait); for (;;) { set_current_state(TASK_INTERRUPTIBLE); @@ -640,7 +640,7 @@ static long rfcomm_sock_data_wait(struct sock *sk, long timeo) } __set_current_state(TASK_RUNNING); - remove_wait_queue(sk->sk_sleep, &wait); + remove_wait_queue(sk_sleep(sk), &wait); return timeo; } diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index ca6b2ad1c3fc..b406d3eff53a 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -567,7 +567,7 @@ static int sco_sock_accept(struct socket *sock, struct socket *newsock, int flag BT_DBG("sk %p timeo %ld", sk, timeo); /* Wait for an incoming connection. (wake-one). */ - add_wait_queue_exclusive(sk->sk_sleep, &wait); + add_wait_queue_exclusive(sk_sleep(sk), &wait); while (!(ch = bt_accept_dequeue(sk, newsock))) { set_current_state(TASK_INTERRUPTIBLE); if (!timeo) { @@ -590,7 +590,7 @@ static int sco_sock_accept(struct socket *sock, struct socket *newsock, int flag } } set_current_state(TASK_RUNNING); - remove_wait_queue(sk->sk_sleep, &wait); + remove_wait_queue(sk_sleep(sk), &wait); if (err) goto done; diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index cdf62b9fefac..90317e7d10b4 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -689,7 +689,7 @@ static unsigned int caif_poll(struct file *file, struct socket *sock, struct sock *sk = sock->sk; struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); u32 mask = 0; - poll_wait(file, sk->sk_sleep, wait); + poll_wait(file, sk_sleep(sk), wait); lock_sock(&(cf_sk->sk)); if (!STATE_IS_OPEN(cf_sk)) { if (!STATE_IS_PENDING(cf_sk)) diff --git a/net/core/datagram.c b/net/core/datagram.c index 2dccd4ee591b..5574a5ddf908 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -86,7 +86,7 @@ static int wait_for_packet(struct sock *sk, int *err, long *timeo_p) int error; DEFINE_WAIT_FUNC(wait, receiver_wake_function); - prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); /* Socket errors? */ error = sock_error(sk); @@ -115,7 +115,7 @@ static int wait_for_packet(struct sock *sk, int *err, long *timeo_p) error = 0; *timeo_p = schedule_timeout(*timeo_p); out: - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); return error; interrupted: error = sock_intr_errno(*timeo_p); @@ -726,7 +726,7 @@ unsigned int datagram_poll(struct file *file, struct socket *sock, struct sock *sk = sock->sk; unsigned int mask; - sock_poll_wait(file, sk->sk_sleep, wait); + sock_poll_wait(file, sk_sleep(sk), wait); mask = 0; /* exceptional events? */ diff --git a/net/core/sock.c b/net/core/sock.c index 7effa1e689df..58ebd146ce5a 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1395,7 +1395,7 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo) if (signal_pending(current)) break; set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) break; if (sk->sk_shutdown & SEND_SHUTDOWN) @@ -1404,7 +1404,7 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo) break; timeo = schedule_timeout(timeo); } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); return timeo; } @@ -1570,11 +1570,11 @@ int sk_wait_data(struct sock *sk, long *timeo) int rc; DEFINE_WAIT(wait); - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue)); clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); return rc; } EXPORT_SYMBOL(sk_wait_data); @@ -1798,7 +1798,7 @@ static void sock_def_wakeup(struct sock *sk) { read_lock(&sk->sk_callback_lock); if (sk_has_sleeper(sk)) - wake_up_interruptible_all(sk->sk_sleep); + wake_up_interruptible_all(sk_sleep(sk)); read_unlock(&sk->sk_callback_lock); } @@ -1806,7 +1806,7 @@ static void sock_def_error_report(struct sock *sk) { read_lock(&sk->sk_callback_lock); if (sk_has_sleeper(sk)) - wake_up_interruptible_poll(sk->sk_sleep, POLLERR); + wake_up_interruptible_poll(sk_sleep(sk), POLLERR); sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR); read_unlock(&sk->sk_callback_lock); } @@ -1815,7 +1815,7 @@ static void sock_def_readable(struct sock *sk, int len) { read_lock(&sk->sk_callback_lock); if (sk_has_sleeper(sk)) - wake_up_interruptible_sync_poll(sk->sk_sleep, POLLIN | + wake_up_interruptible_sync_poll(sk_sleep(sk), POLLIN | POLLRDNORM | POLLRDBAND); sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); read_unlock(&sk->sk_callback_lock); @@ -1830,7 +1830,7 @@ static void sock_def_write_space(struct sock *sk) */ if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) { if (sk_has_sleeper(sk)) - wake_up_interruptible_sync_poll(sk->sk_sleep, POLLOUT | + wake_up_interruptible_sync_poll(sk_sleep(sk), POLLOUT | POLLWRNORM | POLLWRBAND); /* Should agree with poll, otherwise some programs break */ diff --git a/net/core/stream.c b/net/core/stream.c index a37debfeb1b2..7b3c3f30b107 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -32,8 +32,8 @@ void sk_stream_write_space(struct sock *sk) if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) && sock) { clear_bit(SOCK_NOSPACE, &sock->flags); - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) - wake_up_interruptible_poll(sk->sk_sleep, POLLOUT | + if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) + wake_up_interruptible_poll(sk_sleep(sk), POLLOUT | POLLWRNORM | POLLWRBAND); if (sock->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN)) sock_wake_async(sock, SOCK_WAKE_SPACE, POLL_OUT); @@ -66,13 +66,13 @@ int sk_stream_wait_connect(struct sock *sk, long *timeo_p) if (signal_pending(tsk)) return sock_intr_errno(*timeo_p); - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); sk->sk_write_pending++; done = sk_wait_event(sk, timeo_p, !sk->sk_err && !((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))); - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); sk->sk_write_pending--; } while (!done); return 0; @@ -96,13 +96,13 @@ void sk_stream_wait_close(struct sock *sk, long timeout) DEFINE_WAIT(wait); do { - prepare_to_wait(sk->sk_sleep, &wait, + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); if (sk_wait_event(sk, &timeout, !sk_stream_closing(sk))) break; } while (!signal_pending(current) && timeout); - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); } } @@ -126,7 +126,7 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p) while (1) { set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) goto do_error; @@ -157,7 +157,7 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p) *timeo_p = current_timeo; } out: - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); return err; do_error: diff --git a/net/dccp/output.c b/net/dccp/output.c index e98b65e9569f..2d3dcb39851f 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -198,7 +198,7 @@ void dccp_write_space(struct sock *sk) read_lock(&sk->sk_callback_lock); if (sk_has_sleeper(sk)) - wake_up_interruptible(sk->sk_sleep); + wake_up_interruptible(sk_sleep(sk)); /* Should agree with poll, otherwise some programs break */ if (sock_writeable(sk)) sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); @@ -225,7 +225,7 @@ static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb, int delay) dccp_pr_debug("delayed send by %d msec\n", delay); jiffdelay = msecs_to_jiffies(delay); - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); sk->sk_write_pending++; release_sock(sk); @@ -241,7 +241,7 @@ static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb, int delay) rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); } while ((delay = rc) > 0); out: - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); return rc; do_error: diff --git a/net/dccp/proto.c b/net/dccp/proto.c index a0e38d8018f5..b03ecf6b2bb0 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -312,7 +312,7 @@ unsigned int dccp_poll(struct file *file, struct socket *sock, unsigned int mask; struct sock *sk = sock->sk; - sock_poll_wait(file, sk->sk_sleep, wait); + sock_poll_wait(file, sk_sleep(sk), wait); if (sk->sk_state == DCCP_LISTEN) return inet_csk_listen_poll(sk); diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 55e3b6b0061a..d6b93d19790f 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -832,7 +832,7 @@ static int dn_confirm_accept(struct sock *sk, long *timeo, gfp_t allocation) scp->segsize_loc = dst_metric(__sk_dst_get(sk), RTAX_ADVMSS); dn_send_conn_conf(sk, allocation); - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); for(;;) { release_sock(sk); if (scp->state == DN_CC) @@ -850,9 +850,9 @@ static int dn_confirm_accept(struct sock *sk, long *timeo, gfp_t allocation) err = -EAGAIN; if (!*timeo) break; - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); if (err == 0) { sk->sk_socket->state = SS_CONNECTED; } else if (scp->state != DN_CC) { @@ -873,7 +873,7 @@ static int dn_wait_run(struct sock *sk, long *timeo) if (!*timeo) return -EALREADY; - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); for(;;) { release_sock(sk); if (scp->state == DN_CI || scp->state == DN_CC) @@ -891,9 +891,9 @@ static int dn_wait_run(struct sock *sk, long *timeo) err = -ETIMEDOUT; if (!*timeo) break; - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); out: if (err == 0) { sk->sk_socket->state = SS_CONNECTED; @@ -1040,7 +1040,7 @@ static struct sk_buff *dn_wait_for_connect(struct sock *sk, long *timeo) struct sk_buff *skb = NULL; int err = 0; - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); for(;;) { release_sock(sk); skb = skb_dequeue(&sk->sk_receive_queue); @@ -1060,9 +1060,9 @@ static struct sk_buff *dn_wait_for_connect(struct sock *sk, long *timeo) err = -EAGAIN; if (!*timeo) break; - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); return skb == NULL ? ERR_PTR(err) : skb; } @@ -1746,11 +1746,11 @@ static int dn_recvmsg(struct kiocb *iocb, struct socket *sock, goto out; } - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); sk_wait_event(sk, &timeo, dn_data_ready(sk, queue, flags, target)); clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); } skb_queue_walk_safe(queue, skb, n) { @@ -2003,12 +2003,12 @@ static int dn_sendmsg(struct kiocb *iocb, struct socket *sock, goto out; } - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); sk_wait_event(sk, &timeo, !dn_queue_too_long(scp, queue, flags)); clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); continue; } diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index c5376c725503..5ca7290c2e61 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -548,7 +548,7 @@ static long inet_wait_for_connect(struct sock *sk, long timeo) { DEFINE_WAIT(wait); - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); /* Basic assumption: if someone sets sk->sk_err, he _must_ * change state of the socket from TCP_SYN_*. @@ -561,9 +561,9 @@ static long inet_wait_for_connect(struct sock *sk, long timeo) lock_sock(sk); if (signal_pending(current) || !timeo) break; - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); return timeo; } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 8da6429269dd..e0a3e3537b14 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -234,7 +234,7 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo) * having to remove and re-insert us on the wait queue. */ for (;;) { - prepare_to_wait_exclusive(sk->sk_sleep, &wait, + prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); release_sock(sk); if (reqsk_queue_empty(&icsk->icsk_accept_queue)) @@ -253,7 +253,7 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo) if (!timeo) break; } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); return err; } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 0f8caf64caa3..77208334a613 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -378,7 +378,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) struct sock *sk = sock->sk; struct tcp_sock *tp = tcp_sk(sk); - sock_poll_wait(file, sk->sk_sleep, wait); + sock_poll_wait(file, sk_sleep(sk), wait); if (sk->sk_state == TCP_LISTEN) return inet_csk_listen_poll(sk); diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index 2a4efcea3423..79986a674f6e 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -347,7 +347,7 @@ static void irda_flow_indication(void *instance, void *sap, LOCAL_FLOW flow) self->tx_flow = flow; IRDA_DEBUG(1, "%s(), IrTTP wants us to start again\n", __func__); - wake_up_interruptible(sk->sk_sleep); + wake_up_interruptible(sk_sleep(sk)); break; default: IRDA_DEBUG(0, "%s(), Unknown flow command!\n", __func__); @@ -900,7 +900,7 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags) if (flags & O_NONBLOCK) goto out; - err = wait_event_interruptible(*(sk->sk_sleep), + err = wait_event_interruptible(*(sk_sleep(sk)), skb_peek(&sk->sk_receive_queue)); if (err) goto out; @@ -1066,7 +1066,7 @@ static int irda_connect(struct socket *sock, struct sockaddr *uaddr, goto out; err = -ERESTARTSYS; - if (wait_event_interruptible(*(sk->sk_sleep), + if (wait_event_interruptible(*(sk_sleep(sk)), (sk->sk_state != TCP_SYN_SENT))) goto out; @@ -1318,7 +1318,7 @@ static int irda_sendmsg(struct kiocb *iocb, struct socket *sock, /* Check if IrTTP is wants us to slow down */ - if (wait_event_interruptible(*(sk->sk_sleep), + if (wait_event_interruptible(*(sk_sleep(sk)), (self->tx_flow != FLOW_STOP || sk->sk_state != TCP_ESTABLISHED))) { err = -ERESTARTSYS; goto out; @@ -1477,7 +1477,7 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock, if (copied >= target) break; - prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); /* * POSIX 1003.1g mandates this order. @@ -1497,7 +1497,7 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock, /* Wait process until data arrives */ schedule(); - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); if (err) goto out; @@ -1787,7 +1787,7 @@ static unsigned int irda_poll(struct file * file, struct socket *sock, IRDA_DEBUG(4, "%s()\n", __func__); lock_kernel(); - poll_wait(file, sk->sk_sleep, wait); + poll_wait(file, sk_sleep(sk), wait); mask = 0; /* Exceptional events? */ diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index c18286a2167b..9636b7d27b48 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -59,7 +59,7 @@ do { \ DEFINE_WAIT(__wait); \ long __timeo = timeo; \ ret = 0; \ - prepare_to_wait(sk->sk_sleep, &__wait, TASK_INTERRUPTIBLE); \ + prepare_to_wait(sk_sleep(sk), &__wait, TASK_INTERRUPTIBLE); \ while (!(condition)) { \ if (!__timeo) { \ ret = -EAGAIN; \ @@ -76,7 +76,7 @@ do { \ if (ret) \ break; \ } \ - finish_wait(sk->sk_sleep, &__wait); \ + finish_wait(sk_sleep(sk), &__wait); \ } while (0) #define iucv_sock_wait(sk, condition, timeo) \ @@ -307,7 +307,7 @@ static void iucv_sock_wake_msglim(struct sock *sk) { read_lock(&sk->sk_callback_lock); if (sk_has_sleeper(sk)) - wake_up_interruptible_all(sk->sk_sleep); + wake_up_interruptible_all(sk_sleep(sk)); sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); read_unlock(&sk->sk_callback_lock); } @@ -795,7 +795,7 @@ static int iucv_sock_accept(struct socket *sock, struct socket *newsock, timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); /* Wait for an incoming connection */ - add_wait_queue_exclusive(sk->sk_sleep, &wait); + add_wait_queue_exclusive(sk_sleep(sk), &wait); while (!(nsk = iucv_accept_dequeue(sk, newsock))) { set_current_state(TASK_INTERRUPTIBLE); if (!timeo) { @@ -819,7 +819,7 @@ static int iucv_sock_accept(struct socket *sock, struct socket *newsock, } set_current_state(TASK_RUNNING); - remove_wait_queue(sk->sk_sleep, &wait); + remove_wait_queue(sk_sleep(sk), &wait); if (err) goto done; @@ -1269,7 +1269,7 @@ unsigned int iucv_sock_poll(struct file *file, struct socket *sock, struct sock *sk = sock->sk; unsigned int mask = 0; - sock_poll_wait(file, sk->sk_sleep, wait); + sock_poll_wait(file, sk_sleep(sk), wait); if (sk->sk_state == IUCV_LISTEN) return iucv_accept_poll(sk); diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 2db6a9f75913..023ba820236f 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -536,7 +536,7 @@ static int llc_ui_wait_for_disc(struct sock *sk, long timeout) int rc = 0; while (1) { - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); if (sk_wait_event(sk, &timeout, sk->sk_state == TCP_CLOSE)) break; rc = -ERESTARTSYS; @@ -547,7 +547,7 @@ static int llc_ui_wait_for_disc(struct sock *sk, long timeout) break; rc = 0; } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); return rc; } @@ -556,13 +556,13 @@ static int llc_ui_wait_for_conn(struct sock *sk, long timeout) DEFINE_WAIT(wait); while (1) { - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); if (sk_wait_event(sk, &timeout, sk->sk_state != TCP_SYN_SENT)) break; if (signal_pending(current) || !timeout) break; } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); return timeout; } @@ -573,7 +573,7 @@ static int llc_ui_wait_for_busy_core(struct sock *sk, long timeout) int rc; while (1) { - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); rc = 0; if (sk_wait_event(sk, &timeout, (sk->sk_shutdown & RCV_SHUTDOWN) || @@ -588,7 +588,7 @@ static int llc_ui_wait_for_busy_core(struct sock *sk, long timeout) if (!timeout) break; } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); return rc; } diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 8fb0ae616761..7ba06939829f 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -802,7 +802,7 @@ static int sync_thread_backup(void *data) ip_vs_backup_mcast_ifn, ip_vs_backup_syncid); while (!kthread_should_stop()) { - wait_event_interruptible(*tinfo->sock->sk->sk_sleep, + wait_event_interruptible(*sk_sleep(tinfo->sock->sk), !skb_queue_empty(&tinfo->sock->sk->sk_receive_queue) || kthread_should_stop()); diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index fa07f044b599..06cb02796a0e 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -739,7 +739,7 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr, DEFINE_WAIT(wait); for (;;) { - prepare_to_wait(sk->sk_sleep, &wait, + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); if (sk->sk_state != TCP_SYN_SENT) break; @@ -752,7 +752,7 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr, err = -ERESTARTSYS; break; } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); if (err) goto out_release; } @@ -798,7 +798,7 @@ static int nr_accept(struct socket *sock, struct socket *newsock, int flags) * hooked into the SABM we saved */ for (;;) { - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); skb = skb_dequeue(&sk->sk_receive_queue); if (skb) break; @@ -816,7 +816,7 @@ static int nr_accept(struct socket *sock, struct socket *newsock, int flags) err = -ERESTARTSYS; break; } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); if (err) goto out_release; diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index 7919a9edb8e9..aebfecbdb841 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -158,7 +158,7 @@ static unsigned int rds_poll(struct file *file, struct socket *sock, unsigned int mask = 0; unsigned long flags; - poll_wait(file, sk->sk_sleep, wait); + poll_wait(file, sk_sleep(sk), wait); if (rs->rs_seen_congestion) poll_wait(file, &rds_poll_waitq, wait); diff --git a/net/rds/rds.h b/net/rds/rds.h index 4bec6e2ed495..c224b5bb3ba9 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -492,7 +492,7 @@ void rds_sock_put(struct rds_sock *rs); void rds_wake_sk_sleep(struct rds_sock *rs); static inline void __rds_wake_sk_sleep(struct sock *sk) { - wait_queue_head_t *waitq = sk->sk_sleep; + wait_queue_head_t *waitq = sk_sleep(sk); if (!sock_flag(sk, SOCK_DEAD) && waitq) wake_up(waitq); diff --git a/net/rds/recv.c b/net/rds/recv.c index e2a2b9344f7b..795a00b7f2cb 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -432,7 +432,7 @@ int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, break; } - timeo = wait_event_interruptible_timeout(*sk->sk_sleep, + timeo = wait_event_interruptible_timeout(*sk_sleep(sk), (!list_empty(&rs->rs_notify_queue) || rs->rs_cong_notify || rds_next_incoming(rs, &inc)), timeo); diff --git a/net/rds/send.c b/net/rds/send.c index 53d6795ac9d0..9c1c6bcaa6c9 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -915,7 +915,7 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, goto out; } - timeo = wait_event_interruptible_timeout(*sk->sk_sleep, + timeo = wait_event_interruptible_timeout(*sk_sleep(sk), rds_send_queue_rm(rs, conn, rm, rs->rs_bound_port, dport, diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 4fb711a035f4..8e45e76a95f5 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -845,7 +845,7 @@ rose_try_next_neigh: DEFINE_WAIT(wait); for (;;) { - prepare_to_wait(sk->sk_sleep, &wait, + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); if (sk->sk_state != TCP_SYN_SENT) break; @@ -858,7 +858,7 @@ rose_try_next_neigh: err = -ERESTARTSYS; break; } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); if (err) goto out_release; @@ -911,7 +911,7 @@ static int rose_accept(struct socket *sock, struct socket *newsock, int flags) * hooked into the SABM we saved */ for (;;) { - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); skb = skb_dequeue(&sk->sk_receive_queue); if (skb) @@ -930,7 +930,7 @@ static int rose_accept(struct socket *sock, struct socket *newsock, int flags) err = -ERESTARTSYS; break; } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); if (err) goto out_release; diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index c060095b27ce..c432d76f415e 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -65,7 +65,7 @@ static void rxrpc_write_space(struct sock *sk) read_lock(&sk->sk_callback_lock); if (rxrpc_writable(sk)) { if (sk_has_sleeper(sk)) - wake_up_interruptible(sk->sk_sleep); + wake_up_interruptible(sk_sleep(sk)); sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); } read_unlock(&sk->sk_callback_lock); @@ -589,7 +589,7 @@ static unsigned int rxrpc_poll(struct file *file, struct socket *sock, unsigned int mask; struct sock *sk = sock->sk; - sock_poll_wait(file, sk->sk_sleep, wait); + sock_poll_wait(file, sk_sleep(sk), wait); mask = 0; /* the socket is readable if there are any messages waiting on the Rx diff --git a/net/sctp/socket.c b/net/sctp/socket.c index c1941276f6e3..f34adcca8a8c 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -5702,7 +5702,7 @@ unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait) struct sctp_sock *sp = sctp_sk(sk); unsigned int mask; - poll_wait(file, sk->sk_sleep, wait); + poll_wait(file, sk_sleep(sk), wait); /* A TCP-style listening socket becomes readable when the accept queue * is not empty. @@ -5943,7 +5943,7 @@ static int sctp_wait_for_packet(struct sock * sk, int *err, long *timeo_p) int error; DEFINE_WAIT(wait); - prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); /* Socket errors? */ error = sock_error(sk); @@ -5980,14 +5980,14 @@ static int sctp_wait_for_packet(struct sock * sk, int *err, long *timeo_p) sctp_lock_sock(sk); ready: - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); return 0; interrupted: error = sock_intr_errno(*timeo_p); out: - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); *err = error; return error; } @@ -6061,8 +6061,8 @@ static void __sctp_write_space(struct sctp_association *asoc) wake_up_interruptible(&asoc->wait); if (sctp_writeable(sk)) { - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) - wake_up_interruptible(sk->sk_sleep); + if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) + wake_up_interruptible(sk_sleep(sk)); /* Note that we try to include the Async I/O support * here by modeling from the current TCP/UDP code. @@ -6296,7 +6296,7 @@ static int sctp_wait_for_accept(struct sock *sk, long timeo) for (;;) { - prepare_to_wait_exclusive(sk->sk_sleep, &wait, + prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); if (list_empty(&ep->asocs)) { @@ -6322,7 +6322,7 @@ static int sctp_wait_for_accept(struct sock *sk, long timeo) break; } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); return err; } @@ -6332,7 +6332,7 @@ static void sctp_wait_for_close(struct sock *sk, long timeout) DEFINE_WAIT(wait); do { - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); if (list_empty(&sctp_sk(sk)->ep->asocs)) break; sctp_release_sock(sk); @@ -6340,7 +6340,7 @@ static void sctp_wait_for_close(struct sock *sk, long timeout) sctp_lock_sock(sk); } while (!signal_pending(current) && timeout); - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); } static void sctp_skb_set_owner_r_frag(struct sk_buff *skb, struct sock *sk) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index a29f259204e6..ce0d5b35c2ac 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -419,8 +419,8 @@ static void svc_udp_data_ready(struct sock *sk, int count) set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); svc_xprt_enqueue(&svsk->sk_xprt); } - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) - wake_up_interruptible(sk->sk_sleep); + if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) + wake_up_interruptible(sk_sleep(sk)); } /* @@ -436,10 +436,10 @@ static void svc_write_space(struct sock *sk) svc_xprt_enqueue(&svsk->sk_xprt); } - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) { + if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) { dprintk("RPC svc_write_space: someone sleeping on %p\n", svsk); - wake_up_interruptible(sk->sk_sleep); + wake_up_interruptible(sk_sleep(sk)); } } @@ -757,8 +757,8 @@ static void svc_tcp_listen_data_ready(struct sock *sk, int count_unused) printk("svc: socket %p: no user data\n", sk); } - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) - wake_up_interruptible_all(sk->sk_sleep); + if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) + wake_up_interruptible_all(sk_sleep(sk)); } /* @@ -777,8 +777,8 @@ static void svc_tcp_state_change(struct sock *sk) set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); svc_xprt_enqueue(&svsk->sk_xprt); } - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) - wake_up_interruptible_all(sk->sk_sleep); + if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) + wake_up_interruptible_all(sk_sleep(sk)); } static void svc_tcp_data_ready(struct sock *sk, int count) @@ -791,8 +791,8 @@ static void svc_tcp_data_ready(struct sock *sk, int count) set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); svc_xprt_enqueue(&svsk->sk_xprt); } - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) - wake_up_interruptible(sk->sk_sleep); + if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) + wake_up_interruptible(sk_sleep(sk)); } /* @@ -1494,8 +1494,8 @@ static void svc_sock_detach(struct svc_xprt *xprt) sk->sk_data_ready = svsk->sk_odata; sk->sk_write_space = svsk->sk_owspace; - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) - wake_up_interruptible(sk->sk_sleep); + if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) + wake_up_interruptible(sk_sleep(sk)); } /* diff --git a/net/tipc/socket.c b/net/tipc/socket.c index cfb20b80b3a1..66e889ba48fd 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -446,7 +446,7 @@ static unsigned int poll(struct file *file, struct socket *sock, struct sock *sk = sock->sk; u32 mask; - poll_wait(file, sk->sk_sleep, wait); + poll_wait(file, sk_sleep(sk), wait); if (!skb_queue_empty(&sk->sk_receive_queue) || (sock->state == SS_UNCONNECTED) || @@ -591,7 +591,7 @@ static int send_msg(struct kiocb *iocb, struct socket *sock, break; } release_sock(sk); - res = wait_event_interruptible(*sk->sk_sleep, + res = wait_event_interruptible(*sk_sleep(sk), !tport->congested); lock_sock(sk); if (res) @@ -650,7 +650,7 @@ static int send_packet(struct kiocb *iocb, struct socket *sock, break; } release_sock(sk); - res = wait_event_interruptible(*sk->sk_sleep, + res = wait_event_interruptible(*sk_sleep(sk), (!tport->congested || !tport->connected)); lock_sock(sk); if (res) @@ -931,7 +931,7 @@ restart: goto exit; } release_sock(sk); - res = wait_event_interruptible(*sk->sk_sleep, + res = wait_event_interruptible(*sk_sleep(sk), (!skb_queue_empty(&sk->sk_receive_queue) || (sock->state == SS_DISCONNECTING))); lock_sock(sk); @@ -1064,7 +1064,7 @@ restart: goto exit; } release_sock(sk); - res = wait_event_interruptible(*sk->sk_sleep, + res = wait_event_interruptible(*sk_sleep(sk), (!skb_queue_empty(&sk->sk_receive_queue) || (sock->state == SS_DISCONNECTING))); lock_sock(sk); @@ -1271,8 +1271,8 @@ static u32 filter_rcv(struct sock *sk, struct sk_buff *buf) tipc_disconnect_port(tipc_sk_port(sk)); } - if (waitqueue_active(sk->sk_sleep)) - wake_up_interruptible(sk->sk_sleep); + if (waitqueue_active(sk_sleep(sk))) + wake_up_interruptible(sk_sleep(sk)); return TIPC_OK; } @@ -1343,8 +1343,8 @@ static void wakeupdispatch(struct tipc_port *tport) { struct sock *sk = (struct sock *)tport->usr_handle; - if (waitqueue_active(sk->sk_sleep)) - wake_up_interruptible(sk->sk_sleep); + if (waitqueue_active(sk_sleep(sk))) + wake_up_interruptible(sk_sleep(sk)); } /** @@ -1426,7 +1426,7 @@ static int connect(struct socket *sock, struct sockaddr *dest, int destlen, /* Wait until an 'ACK' or 'RST' arrives, or a timeout occurs */ release_sock(sk); - res = wait_event_interruptible_timeout(*sk->sk_sleep, + res = wait_event_interruptible_timeout(*sk_sleep(sk), (!skb_queue_empty(&sk->sk_receive_queue) || (sock->state != SS_CONNECTING)), sk->sk_rcvtimeo); @@ -1521,7 +1521,7 @@ static int accept(struct socket *sock, struct socket *new_sock, int flags) goto exit; } release_sock(sk); - res = wait_event_interruptible(*sk->sk_sleep, + res = wait_event_interruptible(*sk_sleep(sk), (!skb_queue_empty(&sk->sk_receive_queue))); lock_sock(sk); if (res) @@ -1632,8 +1632,8 @@ restart: /* Discard any unreceived messages; wake up sleeping tasks */ discard_rx_queue(sk); - if (waitqueue_active(sk->sk_sleep)) - wake_up_interruptible(sk->sk_sleep); + if (waitqueue_active(sk_sleep(sk))) + wake_up_interruptible(sk_sleep(sk)); res = 0; break; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 3d9122e78f41..87c0360eaa25 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -316,7 +316,7 @@ static void unix_write_space(struct sock *sk) read_lock(&sk->sk_callback_lock); if (unix_writable(sk)) { if (sk_has_sleeper(sk)) - wake_up_interruptible_sync(sk->sk_sleep); + wake_up_interruptible_sync(sk_sleep(sk)); sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); } read_unlock(&sk->sk_callback_lock); @@ -1736,7 +1736,7 @@ static long unix_stream_data_wait(struct sock *sk, long timeo) unix_state_lock(sk); for (;;) { - prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); if (!skb_queue_empty(&sk->sk_receive_queue) || sk->sk_err || @@ -1752,7 +1752,7 @@ static long unix_stream_data_wait(struct sock *sk, long timeo) clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); } - finish_wait(sk->sk_sleep, &wait); + finish_wait(sk_sleep(sk), &wait); unix_state_unlock(sk); return timeo; } @@ -1991,7 +1991,7 @@ static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table struct sock *sk = sock->sk; unsigned int mask; - sock_poll_wait(file, sk->sk_sleep, wait); + sock_poll_wait(file, sk_sleep(sk), wait); mask = 0; /* exceptional events? */ @@ -2028,7 +2028,7 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, struct sock *sk = sock->sk, *other; unsigned int mask, writable; - sock_poll_wait(file, sk->sk_sleep, wait); + sock_poll_wait(file, sk_sleep(sk), wait); mask = 0; /* exceptional events? */ diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index cbddd0cb83f1..6cffbc4da029 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -718,7 +718,7 @@ static int x25_wait_for_connection_establishment(struct sock *sk) DECLARE_WAITQUEUE(wait, current); int rc; - add_wait_queue_exclusive(sk->sk_sleep, &wait); + add_wait_queue_exclusive(sk_sleep(sk), &wait); for (;;) { __set_current_state(TASK_INTERRUPTIBLE); rc = -ERESTARTSYS; @@ -738,7 +738,7 @@ static int x25_wait_for_connection_establishment(struct sock *sk) break; } __set_current_state(TASK_RUNNING); - remove_wait_queue(sk->sk_sleep, &wait); + remove_wait_queue(sk_sleep(sk), &wait); return rc; } @@ -838,7 +838,7 @@ static int x25_wait_for_data(struct sock *sk, long timeout) DECLARE_WAITQUEUE(wait, current); int rc = 0; - add_wait_queue_exclusive(sk->sk_sleep, &wait); + add_wait_queue_exclusive(sk_sleep(sk), &wait); for (;;) { __set_current_state(TASK_INTERRUPTIBLE); if (sk->sk_shutdown & RCV_SHUTDOWN) @@ -858,7 +858,7 @@ static int x25_wait_for_data(struct sock *sk, long timeout) break; } __set_current_state(TASK_RUNNING); - remove_wait_queue(sk->sk_sleep, &wait); + remove_wait_queue(sk_sleep(sk), &wait); return rc; } -- cgit v1.2.3 From 0eae88f31ca2b88911ce843452054139e028771f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 20 Apr 2010 19:06:52 -0700 Subject: net: Fix various endianness glitches Sparse can help us find endianness bugs, but we need to make some cleanups to be able to more easily spot real bugs. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 2 +- net/bridge/br_private.h | 15 ++++++++------- net/ethernet/eth.c | 2 +- net/ipv4/af_inet.c | 8 ++++---- net/ipv4/ipmr.c | 10 +++++----- net/ipv4/route.c | 29 ++++++++++++++--------------- net/ipv4/tcp.c | 15 ++++++++------- net/ipv4/tcp_ipv4.c | 4 ++-- net/ipv4/tcp_output.c | 4 ++-- net/ipv4/udp.c | 8 ++++---- net/ipv6/addrconf.c | 3 ++- net/ipv6/ip6_fib.c | 3 ++- net/ipv6/tcp_ipv6.c | 4 ++-- net/ipv6/udp.c | 4 ++-- net/sched/sch_sfq.c | 10 +++++----- net/sunrpc/xprt.c | 2 +- net/xfrm/xfrm_hash.h | 3 ++- 17 files changed, 65 insertions(+), 61 deletions(-) (limited to 'net/ipv4') diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 3fe86ffc069c..61e1d1094b85 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -29,7 +29,7 @@ static inline int br_ip_hash(struct net_bridge_mdb_htable *mdb, __be32 ip) { - return jhash_1word(mdb->secret, (u32)ip) & (mdb->max - 1); + return jhash_1word(mdb->secret, (__force u32)ip) & (mdb->max - 1); } static struct net_bridge_mdb_entry *__br_mdb_ip_get( diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 791d4ab0fd4d..63181e4a2a67 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -130,19 +130,20 @@ struct net_bridge_port #endif }; +struct br_cpu_netstats { + unsigned long rx_packets; + unsigned long rx_bytes; + unsigned long tx_packets; + unsigned long tx_bytes; +}; + struct net_bridge { spinlock_t lock; struct list_head port_list; struct net_device *dev; - struct br_cpu_netstats __percpu { - unsigned long rx_packets; - unsigned long rx_bytes; - unsigned long tx_packets; - unsigned long tx_bytes; - } *stats; - + struct br_cpu_netstats __percpu *stats; spinlock_t hash_lock; struct hlist_head hash[BR_HASH_SIZE]; unsigned long feature_mask; diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 205a1c12f3c0..35846964082c 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -136,7 +136,7 @@ int eth_rebuild_header(struct sk_buff *skb) default: printk(KERN_DEBUG "%s: unable to resolve type %X addresses.\n", - dev->name, (int)eth->h_proto); + dev->name, (__force int)eth->h_proto); memcpy(eth->h_source, dev->dev_addr, ETH_ALEN); break; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 5ca7290c2e61..9f52880fae10 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1323,8 +1323,8 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) goto out_unlock; - id = ntohl(*(u32 *)&iph->id); - flush = (u16)((ntohl(*(u32 *)iph) ^ skb_gro_len(skb)) | (id ^ IP_DF)); + id = ntohl(*(__be32 *)&iph->id); + flush = (u16)((ntohl(*(__be32 *)iph) ^ skb_gro_len(skb)) | (id ^ IP_DF)); id >>= 16; for (p = *head; p; p = p->next) { @@ -1337,8 +1337,8 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, if ((iph->protocol ^ iph2->protocol) | (iph->tos ^ iph2->tos) | - (iph->saddr ^ iph2->saddr) | - (iph->daddr ^ iph2->daddr)) { + ((__force u32)iph->saddr ^ (__force u32)iph2->saddr) | + ((__force u32)iph->daddr ^ (__force u32)iph2->daddr)) { NAPI_GRO_CB(p)->same_flow = 0; continue; } diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 7d8a2bcecb76..a2df5012a1d0 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1772,10 +1772,10 @@ int ip_mr_input(struct sk_buff *skb) vif = ipmr_find_vif(mrt, skb->dev); if (vif >= 0) { - int err = ipmr_cache_unresolved(mrt, vif, skb); + int err2 = ipmr_cache_unresolved(mrt, vif, skb); read_unlock(&mrt_lock); - return err; + return err2; } read_unlock(&mrt_lock); kfree_skb(skb); @@ -2227,9 +2227,9 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) const struct ipmr_mfc_iter *it = seq->private; const struct mr_table *mrt = it->mrt; - seq_printf(seq, "%08lX %08lX %-3hd", - (unsigned long) mfc->mfc_mcastgrp, - (unsigned long) mfc->mfc_origin, + seq_printf(seq, "%08X %08X %-3hd", + (__force u32) mfc->mfc_mcastgrp, + (__force u32) mfc->mfc_origin, mfc->mfc_parent); if (it->cache != &mrt->mfc_unres_queue) { diff --git a/net/ipv4/route.c b/net/ipv4/route.c index cb562fdd9b9a..a947428ef0ae 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -258,10 +258,9 @@ static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); (__raw_get_cpu_var(rt_cache_stat).field++) static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx, - int genid) + int genid) { - return jhash_3words((__force u32)(__be32)(daddr), - (__force u32)(__be32)(saddr), + return jhash_3words((__force u32)daddr, (__force u32)saddr, idx, genid) & rt_hash_mask; } @@ -378,12 +377,13 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v) struct rtable *r = v; int len; - seq_printf(seq, "%s\t%08lX\t%08lX\t%8X\t%d\t%u\t%d\t" - "%08lX\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n", + seq_printf(seq, "%s\t%08X\t%08X\t%8X\t%d\t%u\t%d\t" + "%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n", r->u.dst.dev ? r->u.dst.dev->name : "*", - (unsigned long)r->rt_dst, (unsigned long)r->rt_gateway, + (__force u32)r->rt_dst, + (__force u32)r->rt_gateway, r->rt_flags, atomic_read(&r->u.dst.__refcnt), - r->u.dst.__use, 0, (unsigned long)r->rt_src, + r->u.dst.__use, 0, (__force u32)r->rt_src, (dst_metric(&r->u.dst, RTAX_ADVMSS) ? (int)dst_metric(&r->u.dst, RTAX_ADVMSS) + 40 : 0), dst_metric(&r->u.dst, RTAX_WINDOW), @@ -685,18 +685,17 @@ static inline bool rt_caching(const struct net *net) static inline bool compare_hash_inputs(const struct flowi *fl1, const struct flowi *fl2) { - return (__force u32)(((fl1->nl_u.ip4_u.daddr ^ fl2->nl_u.ip4_u.daddr) | - (fl1->nl_u.ip4_u.saddr ^ fl2->nl_u.ip4_u.saddr) | + return ((((__force u32)fl1->nl_u.ip4_u.daddr ^ (__force u32)fl2->nl_u.ip4_u.daddr) | + ((__force u32)fl1->nl_u.ip4_u.saddr ^ (__force u32)fl2->nl_u.ip4_u.saddr) | (fl1->iif ^ fl2->iif)) == 0); } static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) { - return ((__force u32)((fl1->nl_u.ip4_u.daddr ^ fl2->nl_u.ip4_u.daddr) | - (fl1->nl_u.ip4_u.saddr ^ fl2->nl_u.ip4_u.saddr)) | + return (((__force u32)fl1->nl_u.ip4_u.daddr ^ (__force u32)fl2->nl_u.ip4_u.daddr) | + ((__force u32)fl1->nl_u.ip4_u.saddr ^ (__force u32)fl2->nl_u.ip4_u.saddr) | (fl1->mark ^ fl2->mark) | - (*(u16 *)&fl1->nl_u.ip4_u.tos ^ - *(u16 *)&fl2->nl_u.ip4_u.tos) | + (*(u16 *)&fl1->nl_u.ip4_u.tos ^ *(u16 *)&fl2->nl_u.ip4_u.tos) | (fl1->oif ^ fl2->oif) | (fl1->iif ^ fl2->iif)) == 0; } @@ -2319,8 +2318,8 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, rcu_read_lock(); for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; rth = rcu_dereference(rth->u.dst.rt_next)) { - if (((rth->fl.fl4_dst ^ daddr) | - (rth->fl.fl4_src ^ saddr) | + if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) | + ((__force u32)rth->fl.fl4_src ^ (__force u32)saddr) | (rth->fl.iif ^ iif) | rth->fl.oif | (rth->fl.fl4_tos ^ tos)) == 0 && diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 77208334a613..6689c61cab47 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2721,7 +2721,7 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) struct tcphdr *th2; unsigned int len; unsigned int thlen; - unsigned int flags; + __be32 flags; unsigned int mss = 1; unsigned int hlen; unsigned int off; @@ -2771,10 +2771,10 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) found: flush = NAPI_GRO_CB(p)->flush; - flush |= flags & TCP_FLAG_CWR; - flush |= (flags ^ tcp_flag_word(th2)) & - ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH); - flush |= th->ack_seq ^ th2->ack_seq; + flush |= (__force int)(flags & TCP_FLAG_CWR); + flush |= (__force int)((flags ^ tcp_flag_word(th2)) & + ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH)); + flush |= (__force int)(th->ack_seq ^ th2->ack_seq); for (i = sizeof(*th); i < thlen; i += 4) flush |= *(u32 *)((u8 *)th + i) ^ *(u32 *)((u8 *)th2 + i); @@ -2795,8 +2795,9 @@ found: out_check_final: flush = len < mss; - flush |= flags & (TCP_FLAG_URG | TCP_FLAG_PSH | TCP_FLAG_RST | - TCP_FLAG_SYN | TCP_FLAG_FIN); + flush |= (__force int)(flags & (TCP_FLAG_URG | TCP_FLAG_PSH | + TCP_FLAG_RST | TCP_FLAG_SYN | + TCP_FLAG_FIN)); if (p && (!NAPI_GRO_CB(skb)->same_flow || flush)) pp = head; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index ad08392a738c..4d6717d1e61c 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1286,8 +1286,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) goto drop_and_release; /* Secret recipe starts with IP addresses */ - *mess++ ^= daddr; - *mess++ ^= saddr; + *mess++ ^= (__force u32)daddr; + *mess++ ^= (__force u32)saddr; /* plus variable length Initiator Cookie */ c = (u8 *)mess; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 2b7d71fb8439..429ad9286efc 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -861,7 +861,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, th->urg_ptr = htons(tp->snd_up - tcb->seq); th->urg = 1; } else if (after(tcb->seq + 0xFFFF, tp->snd_nxt)) { - th->urg_ptr = 0xFFFF; + th->urg_ptr = htons(0xFFFF); th->urg = 1; } } @@ -2485,7 +2485,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, *tail-- ^= TCP_SKB_CB(skb)->seq + 1; /* recommended */ - *tail-- ^= ((th->dest << 16) | th->source); + *tail-- ^= (((__force u32)th->dest << 16) | (__force u32)th->source); *tail-- ^= (u32)(unsigned long)cvp; /* per sockopt */ sha_transform((__u32 *)&xvp->cookie_bakery[0], diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 666b963496ff..1e18f9cc9247 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -307,13 +307,13 @@ static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) static unsigned int udp4_portaddr_hash(struct net *net, __be32 saddr, unsigned int port) { - return jhash_1word(saddr, net_hash_mix(net)) ^ port; + return jhash_1word((__force u32)saddr, net_hash_mix(net)) ^ port; } int udp_v4_get_port(struct sock *sk, unsigned short snum) { unsigned int hash2_nulladdr = - udp4_portaddr_hash(sock_net(sk), INADDR_ANY, snum); + udp4_portaddr_hash(sock_net(sk), htonl(INADDR_ANY), snum); unsigned int hash2_partial = udp4_portaddr_hash(sock_net(sk), inet_sk(sk)->inet_rcv_saddr, 0); @@ -466,14 +466,14 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, daddr, hnum, dif, hslot2, slot2); if (!result) { - hash2 = udp4_portaddr_hash(net, INADDR_ANY, hnum); + hash2 = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum); slot2 = hash2 & udptable->mask; hslot2 = &udptable->hash2[slot2]; if (hslot->count < hslot2->count) goto begin; result = udp4_lib_lookup2(net, saddr, sport, - INADDR_ANY, hnum, dif, + htonl(INADDR_ANY), hnum, dif, hslot2, slot2); } rcu_read_unlock(); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 7cba8845242f..34d2d649e396 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -588,7 +588,8 @@ static u32 ipv6_addr_hash(const struct in6_addr *addr) * We perform the hash function over the last 64 bits of the address * This will include the IEEE address token on links that support it. */ - return jhash_2words(addr->s6_addr32[2], addr->s6_addr32[3], 0) + return jhash_2words((__force u32)addr->s6_addr32[2], + (__force u32)addr->s6_addr32[3], 0) & (IN6_ADDR_HSIZE - 1); } diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index dc6e0b8f260d..92a122b7795d 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -144,7 +144,8 @@ static __inline__ __be32 addr_bit_set(void *token, int fn_bit) * htonl(1 << ((~fn_bit)&0x1F)) * See include/asm-generic/bitops/le.h. */ - return (1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f)) & addr[fn_bit >> 5]; + return (__force __be32)(1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f)) & + addr[fn_bit >> 5]; } static __inline__ struct fib6_node * node_alloc(void) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index bd5ef7b6e48e..a92b4a5cd8bf 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1234,12 +1234,12 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) goto drop_and_free; /* Secret recipe starts with IP addresses */ - d = &ipv6_hdr(skb)->daddr.s6_addr32[0]; + d = (__force u32 *)&ipv6_hdr(skb)->daddr.s6_addr32[0]; *mess++ ^= *d++; *mess++ ^= *d++; *mess++ ^= *d++; *mess++ ^= *d++; - d = &ipv6_hdr(skb)->saddr.s6_addr32[0]; + d = (__force u32 *)&ipv6_hdr(skb)->saddr.s6_addr32[0]; *mess++ ^= *d++; *mess++ ^= *d++; *mess++ ^= *d++; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 90824852f598..92bf9033e245 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -91,9 +91,9 @@ static unsigned int udp6_portaddr_hash(struct net *net, if (ipv6_addr_any(addr6)) hash = jhash_1word(0, mix); else if (ipv6_addr_v4mapped(addr6)) - hash = jhash_1word(addr6->s6_addr32[3], mix); + hash = jhash_1word((__force u32)addr6->s6_addr32[3], mix); else - hash = jhash2(addr6->s6_addr32, 4, mix); + hash = jhash2((__force u32 *)addr6->s6_addr32, 4, mix); return hash ^ port; } diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index c5a9ac566007..c65762823f5e 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -123,8 +123,8 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb) case htons(ETH_P_IP): { const struct iphdr *iph = ip_hdr(skb); - h = iph->daddr; - h2 = iph->saddr ^ iph->protocol; + h = (__force u32)iph->daddr; + h2 = (__force u32)iph->saddr ^ iph->protocol; if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && (iph->protocol == IPPROTO_TCP || iph->protocol == IPPROTO_UDP || @@ -138,8 +138,8 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb) case htons(ETH_P_IPV6): { struct ipv6hdr *iph = ipv6_hdr(skb); - h = iph->daddr.s6_addr32[3]; - h2 = iph->saddr.s6_addr32[3] ^ iph->nexthdr; + h = (__force u32)iph->daddr.s6_addr32[3]; + h2 = (__force u32)iph->saddr.s6_addr32[3] ^ iph->nexthdr; if (iph->nexthdr == IPPROTO_TCP || iph->nexthdr == IPPROTO_UDP || iph->nexthdr == IPPROTO_UDPLITE || @@ -150,7 +150,7 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb) break; } default: - h = (unsigned long)skb_dst(skb) ^ skb->protocol; + h = (unsigned long)skb_dst(skb) ^ (__force u32)skb->protocol; h2 = (unsigned long)skb->sk; } diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 42f09ade0044..699ade68aac1 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -974,7 +974,7 @@ void xprt_reserve(struct rpc_task *task) static inline __be32 xprt_alloc_xid(struct rpc_xprt *xprt) { - return xprt->xid++; + return (__force __be32)xprt->xid++; } static inline void xprt_init_xid(struct rpc_xprt *xprt) diff --git a/net/xfrm/xfrm_hash.h b/net/xfrm/xfrm_hash.h index e5195c99f71e..1396572d2ade 100644 --- a/net/xfrm/xfrm_hash.h +++ b/net/xfrm/xfrm_hash.h @@ -16,7 +16,8 @@ static inline unsigned int __xfrm6_addr_hash(xfrm_address_t *addr) static inline unsigned int __xfrm4_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr) { - return ntohl(daddr->a4 + saddr->a4); + u32 sum = (__force u32)daddr->a4 + (__force u32)saddr->a4; + return ntohl((__force __be32)sum); } static inline unsigned int __xfrm6_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr) -- cgit v1.2.3 From aa2ea0586d9dbe56a334d835a43b45e8c2104e77 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Thu, 22 Apr 2010 07:00:24 +0000 Subject: tcp: fix outsegs stat for TSO segments Account for TSO segments of an skb in TCP_MIB_OUTSEGS counter. Without doing this, the counter can be off by orders of magnitude from the actual number of segments sent. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/snmp.h | 2 ++ include/net/tcp.h | 1 + net/ipv4/tcp_output.c | 5 +++-- 3 files changed, 6 insertions(+), 2 deletions(-) (limited to 'net/ipv4') diff --git a/include/net/snmp.h b/include/net/snmp.h index 884fdbb74b23..92456f1035f5 100644 --- a/include/net/snmp.h +++ b/include/net/snmp.h @@ -133,6 +133,8 @@ struct linux_xfrm_mib { __this_cpu_add(mib[0]->mibs[field], addend) #define SNMP_ADD_STATS_USER(mib, field, addend) \ this_cpu_add(mib[1]->mibs[field], addend) +#define SNMP_ADD_STATS(mib, field, addend) \ + this_cpu_add(mib[0]->mibs[field], addend) /* * Use "__typeof__(*mib[0]) *ptr" instead of "__typeof__(mib[0]) ptr" * to make @ptr a non-percpu pointer. diff --git a/include/net/tcp.h b/include/net/tcp.h index b7d83d204a93..3f87fd87bc9c 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -294,6 +294,7 @@ extern struct proto tcp_prot; #define TCP_INC_STATS_BH(net, field) SNMP_INC_STATS_BH((net)->mib.tcp_statistics, field) #define TCP_DEC_STATS(net, field) SNMP_DEC_STATS((net)->mib.tcp_statistics, field) #define TCP_ADD_STATS_USER(net, field, val) SNMP_ADD_STATS_USER((net)->mib.tcp_statistics, field, val) +#define TCP_ADD_STATS(net, field, val) SNMP_ADD_STATS((net)->mib.tcp_statistics, field, val) extern void tcp_v4_err(struct sk_buff *skb, u32); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 429ad9286efc..5db3a2c6cb33 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -888,7 +888,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, tcp_event_data_sent(tp, skb, sk); if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq) - TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS); + TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS, + tcp_skb_pcount(skb)); err = icsk->icsk_af_ops->queue_xmit(skb); if (likely(err <= 0)) @@ -2503,7 +2504,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, th->window = htons(min(req->rcv_wnd, 65535U)); tcp_options_write((__be32 *)(th + 1), tp, &opts); th->doff = (tcp_header_size >> 2); - TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS); + TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS, tcp_skb_pcount(skb)); #ifdef CONFIG_TCP_MD5SIG /* Okay, we have all we need - do the md5 hash if needed */ -- cgit v1.2.3 From fda48a0d7a8412cedacda46a9c0bf8ef9cd13559 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 21 Apr 2010 09:26:15 +0000 Subject: tcp: bind() fix when many ports are bound Port autoselection done by kernel only works when number of bound sockets is under a threshold (typically 30000). When this threshold is over, we must check if there is a conflict before exiting first loop in inet_csk_get_port() Change inet_csk_bind_conflict() to forbid two reuse-enabled sockets to bind on same (address,port) tuple (with a non ANY address) Same change for inet6_csk_bind_conflict() Reported-by: Gaspar Chilingarov Signed-off-by: Eric Dumazet Acked-by: Evgeniy Polyakov Signed-off-by: David S. Miller --- net/ipv4/inet_connection_sock.c | 16 +++++++++++----- net/ipv6/inet6_connection_sock.c | 15 ++++++++++----- 2 files changed, 21 insertions(+), 10 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 8da6429269dd..14825eb09770 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -70,13 +70,17 @@ int inet_csk_bind_conflict(const struct sock *sk, (!sk->sk_bound_dev_if || !sk2->sk_bound_dev_if || sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { + const __be32 sk2_rcv_saddr = inet_rcv_saddr(sk2); + if (!reuse || !sk2->sk_reuse || sk2->sk_state == TCP_LISTEN) { - const __be32 sk2_rcv_saddr = inet_rcv_saddr(sk2); if (!sk2_rcv_saddr || !sk_rcv_saddr || sk2_rcv_saddr == sk_rcv_saddr) break; - } + } else if (reuse && sk2->sk_reuse && + sk2_rcv_saddr && + sk2_rcv_saddr == sk_rcv_saddr) + break; } } return node != NULL; @@ -120,9 +124,11 @@ again: smallest_size = tb->num_owners; smallest_rover = rover; if (atomic_read(&hashinfo->bsockets) > (high - low) + 1) { - spin_unlock(&head->lock); - snum = smallest_rover; - goto have_snum; + if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb)) { + spin_unlock(&head->lock); + snum = smallest_rover; + goto have_snum; + } } } goto next; diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 628db24bcf22..b4b7d40a9c95 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -42,11 +42,16 @@ int inet6_csk_bind_conflict(const struct sock *sk, if (sk != sk2 && (!sk->sk_bound_dev_if || !sk2->sk_bound_dev_if || - sk->sk_bound_dev_if == sk2->sk_bound_dev_if) && - (!sk->sk_reuse || !sk2->sk_reuse || - sk2->sk_state == TCP_LISTEN) && - ipv6_rcv_saddr_equal(sk, sk2)) - break; + sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { + if ((!sk->sk_reuse || !sk2->sk_reuse || + sk2->sk_state == TCP_LISTEN) && + ipv6_rcv_saddr_equal(sk, sk2)) + break; + else if (sk->sk_reuse && sk2->sk_reuse && + !ipv6_addr_any(inet6_rcv_saddr(sk2)) && + ipv6_rcv_saddr_equal(sk, sk2)) + break; + } } return node != NULL; -- cgit v1.2.3 From 3d0c9c4eb2dbdcc461be4084abd87a9a9e70f713 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 26 Apr 2010 16:02:04 +0200 Subject: net: fib_rules: mark arguments to fib_rules_register const and __net_initdata fib_rules_register() duplicates the template passed to it without modification, mark the argument as const. Additionally the templates are only needed when instantiating a new namespace, so mark them as __net_initdata, which means they can be discarded when CONFIG_NET_NS=n. Signed-off-by: Patrick McHardy --- include/net/fib_rules.h | 2 +- net/core/fib_rules.c | 2 +- net/decnet/dn_rules.c | 2 +- net/ipv4/fib_rules.c | 2 +- net/ipv4/ipmr.c | 2 +- net/ipv6/fib6_rules.c | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) (limited to 'net/ipv4') diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 52bd9e6c9141..e8923bc20f9f 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -104,7 +104,7 @@ static inline u32 frh_get_table(struct fib_rule_hdr *frh, struct nlattr **nla) return frh->table; } -extern struct fib_rules_ops *fib_rules_register(struct fib_rules_ops *, struct net *); +extern struct fib_rules_ops *fib_rules_register(const struct fib_rules_ops *, struct net *); extern void fib_rules_unregister(struct fib_rules_ops *); extern void fib_rules_cleanup_ops(struct fib_rules_ops *); diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 1bc66592453c..42e84e08a1be 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -122,7 +122,7 @@ errout: } struct fib_rules_ops * -fib_rules_register(struct fib_rules_ops *tmpl, struct net *net) +fib_rules_register(const struct fib_rules_ops *tmpl, struct net *net) { struct fib_rules_ops *ops; int err; diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c index af28dcc21844..1226bcad776b 100644 --- a/net/decnet/dn_rules.c +++ b/net/decnet/dn_rules.c @@ -216,7 +216,7 @@ static void dn_fib_rule_flush_cache(struct fib_rules_ops *ops) dn_rt_cache_flush(-1); } -static struct fib_rules_ops dn_fib_rules_ops_template = { +static const struct fib_rules_ops __net_initdata dn_fib_rules_ops_template = { .family = FIB_RULES_DECNET, .rule_size = sizeof(struct dn_fib_rule), .addr_size = sizeof(u16), diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 3ec84fea5b71..8ab62a56701c 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -245,7 +245,7 @@ static void fib4_rule_flush_cache(struct fib_rules_ops *ops) rt_cache_flush(ops->fro_net, -1); } -static struct fib_rules_ops fib4_rules_ops_template = { +static const struct fib_rules_ops __net_initdata fib4_rules_ops_template = { .family = FIB_RULES_IPV4, .rule_size = sizeof(struct fib4_rule), .addr_size = sizeof(u32), diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index a2df5012a1d0..7d3e382aed64 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -216,7 +216,7 @@ static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb, return 0; } -static struct fib_rules_ops ipmr_rules_ops_template = { +static const struct fib_rules_ops __net_initdata ipmr_rules_ops_template = { .family = FIB_RULES_IPMR, .rule_size = sizeof(struct ipmr_rule), .addr_size = sizeof(u32), diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 8124f16f2ac2..35f6949446f0 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -237,7 +237,7 @@ static size_t fib6_rule_nlmsg_payload(struct fib_rule *rule) + nla_total_size(16); /* src */ } -static struct fib_rules_ops fib6_rules_ops_template = { +static const struct fib_rules_ops __net_initdata fib6_rules_ops_template = { .family = FIB_RULES_IPV6, .rule_size = sizeof(struct fib6_rule), .addr_size = sizeof(struct in6_addr), -- cgit v1.2.3 From 25239cee7e8732dbdc9f5d324f1c22a3bdec1d1f Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 26 Apr 2010 16:02:05 +0200 Subject: net: rtnetlink: decouple rtnetlink address families from real address families Decouple rtnetlink address families from real address families in socket.h to be able to add rtnetlink interfaces to code that is not a real address family without increasing AF_MAX/NPROTO. This will be used to add support for multicast route dumping from all tables as the proc interface can't be extended to support anything but the main table without breaking compatibility. This partialy undoes the patch to introduce independant families for routing rules and converts ipmr routing rules to a new rtnetlink family. Similar to that patch, values up to 127 are reserved for real address families, values above that may be used arbitrarily. Signed-off-by: Patrick McHardy --- include/linux/fib_rules.h | 8 -------- include/linux/rtnetlink.h | 6 ++++++ net/core/rtnetlink.c | 14 +++++++------- net/decnet/dn_rules.c | 2 +- net/ipv4/fib_rules.c | 2 +- net/ipv4/ipmr.c | 2 +- net/ipv6/fib6_rules.c | 2 +- 7 files changed, 17 insertions(+), 19 deletions(-) (limited to 'net/ipv4') diff --git a/include/linux/fib_rules.h b/include/linux/fib_rules.h index 04a397619ebe..51da65b68b85 100644 --- a/include/linux/fib_rules.h +++ b/include/linux/fib_rules.h @@ -15,14 +15,6 @@ /* try to find source address in routing lookups */ #define FIB_RULE_FIND_SADDR 0x00010000 -/* fib_rules families. values up to 127 are reserved for real address - * families, values above 128 may be used arbitrarily. - */ -#define FIB_RULES_IPV4 AF_INET -#define FIB_RULES_IPV6 AF_INET6 -#define FIB_RULES_DECNET AF_DECnet -#define FIB_RULES_IPMR 128 - struct fib_rule_hdr { __u8 family; __u8 dst_len; diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index d1c7c90e9cd4..5a42c36cb6aa 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -7,6 +7,12 @@ #include #include +/* rtnetlink families. Values up to 127 are reserved for real address + * families, values above 128 may be used arbitrarily. + */ +#define RTNL_FAMILY_IPMR 128 +#define RTNL_FAMILY_MAX 128 + /**** * Routing/neighbour discovery messages. ****/ diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 78c85985cb30..fd781b62fa7f 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -98,7 +98,7 @@ int lockdep_rtnl_is_held(void) EXPORT_SYMBOL(lockdep_rtnl_is_held); #endif /* #ifdef CONFIG_PROVE_LOCKING */ -static struct rtnl_link *rtnl_msg_handlers[NPROTO]; +static struct rtnl_link *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1]; static inline int rtm_msgindex(int msgtype) { @@ -118,7 +118,7 @@ static rtnl_doit_func rtnl_get_doit(int protocol, int msgindex) { struct rtnl_link *tab; - if (protocol < NPROTO) + if (protocol <= RTNL_FAMILY_MAX) tab = rtnl_msg_handlers[protocol]; else tab = NULL; @@ -133,7 +133,7 @@ static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex) { struct rtnl_link *tab; - if (protocol < NPROTO) + if (protocol <= RTNL_FAMILY_MAX) tab = rtnl_msg_handlers[protocol]; else tab = NULL; @@ -167,7 +167,7 @@ int __rtnl_register(int protocol, int msgtype, struct rtnl_link *tab; int msgindex; - BUG_ON(protocol < 0 || protocol >= NPROTO); + BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX); msgindex = rtm_msgindex(msgtype); tab = rtnl_msg_handlers[protocol]; @@ -219,7 +219,7 @@ int rtnl_unregister(int protocol, int msgtype) { int msgindex; - BUG_ON(protocol < 0 || protocol >= NPROTO); + BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX); msgindex = rtm_msgindex(msgtype); if (rtnl_msg_handlers[protocol] == NULL) @@ -241,7 +241,7 @@ EXPORT_SYMBOL_GPL(rtnl_unregister); */ void rtnl_unregister_all(int protocol) { - BUG_ON(protocol < 0 || protocol >= NPROTO); + BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX); kfree(rtnl_msg_handlers[protocol]); rtnl_msg_handlers[protocol] = NULL; @@ -1384,7 +1384,7 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb) if (s_idx == 0) s_idx = 1; - for (idx = 1; idx < NPROTO; idx++) { + for (idx = 1; idx <= RTNL_FAMILY_MAX; idx++) { int type = cb->nlh->nlmsg_type-RTM_BASE; if (idx < s_idx || idx == PF_PACKET) continue; diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c index 1226bcad776b..48fdf10be7a1 100644 --- a/net/decnet/dn_rules.c +++ b/net/decnet/dn_rules.c @@ -217,7 +217,7 @@ static void dn_fib_rule_flush_cache(struct fib_rules_ops *ops) } static const struct fib_rules_ops __net_initdata dn_fib_rules_ops_template = { - .family = FIB_RULES_DECNET, + .family = AF_DECnet, .rule_size = sizeof(struct dn_fib_rule), .addr_size = sizeof(u16), .action = dn_fib_rule_action, diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 8ab62a56701c..76daeb5ff564 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -246,7 +246,7 @@ static void fib4_rule_flush_cache(struct fib_rules_ops *ops) } static const struct fib_rules_ops __net_initdata fib4_rules_ops_template = { - .family = FIB_RULES_IPV4, + .family = AF_INET, .rule_size = sizeof(struct fib4_rule), .addr_size = sizeof(u32), .action = fib4_rule_action, diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 7d3e382aed64..41e8fc0ce8b3 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -217,7 +217,7 @@ static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb, } static const struct fib_rules_ops __net_initdata ipmr_rules_ops_template = { - .family = FIB_RULES_IPMR, + .family = RTNL_FAMILY_IPMR, .rule_size = sizeof(struct ipmr_rule), .addr_size = sizeof(u32), .action = ipmr_rule_action, diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 35f6949446f0..8e44f8f9c188 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -238,7 +238,7 @@ static size_t fib6_rule_nlmsg_payload(struct fib_rule *rule) } static const struct fib_rules_ops __net_initdata fib6_rules_ops_template = { - .family = FIB_RULES_IPV6, + .family = AF_INET6, .rule_size = sizeof(struct fib6_rule), .addr_size = sizeof(struct in6_addr), .action = fib6_rule_action, -- cgit v1.2.3 From cb6a4e461fb427689920472bd7335f926d521747 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 26 Apr 2010 16:02:08 +0200 Subject: net: ipmr: add support for dumping routing tables over netlink The ipmr /proc interface (ip_mr_cache) can't be extended to dump routes from any tables but the main table in a backwards compatible fashion since the output format ends in a variable amount of output interfaces. Introduce a new netlink interface to dump multicast routes from all tables, similar to the netlink interface for regular routes. Signed-off-by: Patrick McHardy --- net/ipv4/ipmr.c | 96 ++++++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 89 insertions(+), 7 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 41e8fc0ce8b3..eddfd12f55b8 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -128,8 +128,8 @@ static int ip_mr_forward(struct net *net, struct mr_table *mrt, int local); static int ipmr_cache_report(struct mr_table *mrt, struct sk_buff *pkt, vifi_t vifi, int assert); -static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, - struct mfc_cache *c, struct rtmsg *rtm); +static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, + struct mfc_cache *c, struct rtmsg *rtm); static void ipmr_expire_process(unsigned long arg); #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES @@ -831,7 +831,7 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt, if (ip_hdr(skb)->version == 0) { struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); - if (ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) { + if (__ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) { nlh->nlmsg_len = (skb_tail_pointer(skb) - (u8 *)nlh); } else { @@ -1904,9 +1904,8 @@ drop: } #endif -static int -ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, struct mfc_cache *c, - struct rtmsg *rtm) +static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, + struct mfc_cache *c, struct rtmsg *rtm) { int ct; struct rtnexthop *nhp; @@ -1994,11 +1993,93 @@ int ipmr_get_route(struct net *net, if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY)) cache->mfc_flags |= MFC_NOTIFY; - err = ipmr_fill_mroute(mrt, skb, cache, rtm); + err = __ipmr_fill_mroute(mrt, skb, cache, rtm); read_unlock(&mrt_lock); return err; } +static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, + u32 pid, u32 seq, struct mfc_cache *c) +{ + struct nlmsghdr *nlh; + struct rtmsg *rtm; + + nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI); + if (nlh == NULL) + return -EMSGSIZE; + + rtm = nlmsg_data(nlh); + rtm->rtm_family = RTNL_FAMILY_IPMR; + rtm->rtm_dst_len = 32; + rtm->rtm_src_len = 32; + rtm->rtm_tos = 0; + rtm->rtm_table = mrt->id; + NLA_PUT_U32(skb, RTA_TABLE, mrt->id); + rtm->rtm_type = RTN_MULTICAST; + rtm->rtm_scope = RT_SCOPE_UNIVERSE; + rtm->rtm_protocol = RTPROT_UNSPEC; + rtm->rtm_flags = 0; + + NLA_PUT_BE32(skb, RTA_SRC, c->mfc_origin); + NLA_PUT_BE32(skb, RTA_DST, c->mfc_mcastgrp); + + if (__ipmr_fill_mroute(mrt, skb, c, rtm) < 0) + goto nla_put_failure; + + return nlmsg_end(skb, nlh); + +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + struct mr_table *mrt; + struct mfc_cache *mfc; + unsigned int t = 0, s_t; + unsigned int h = 0, s_h; + unsigned int e = 0, s_e; + + s_t = cb->args[0]; + s_h = cb->args[1]; + s_e = cb->args[2]; + + read_lock(&mrt_lock); + ipmr_for_each_table(mrt, net) { + if (t < s_t) + goto next_table; + if (t > s_t) + s_h = 0; + for (h = s_h; h < MFC_LINES; h++) { + list_for_each_entry(mfc, &mrt->mfc_cache_array[h], list) { + if (e < s_e) + goto next_entry; + if (ipmr_fill_mroute(mrt, skb, + NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, + mfc) < 0) + goto done; +next_entry: + e++; + } + e = s_e = 0; + } + s_h = 0; +next_table: + t++; + } +done: + read_unlock(&mrt_lock); + + cb->args[2] = e; + cb->args[1] = h; + cb->args[0] = t; + + return skb->len; +} + #ifdef CONFIG_PROC_FS /* * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif @@ -2355,6 +2436,7 @@ int __init ip_mr_init(void) goto add_proto_fail; } #endif + rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, NULL, ipmr_rtm_dumproute); return 0; #ifdef CONFIG_IP_PIMSM_V2 -- cgit v1.2.3 From 6c37e5de456987f5bc80879afde05aa120784095 Mon Sep 17 00:00:00 2001 From: Flavio Leitner Date: Mon, 26 Apr 2010 18:33:27 +0000 Subject: TCP: avoid to send keepalive probes if receiving data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RFC 1122 says the following: ... Keep-alive packets MUST only be sent when no data or acknowledgement packets have been received for the connection within an interval. ... The acknowledgement packet is reseting the keepalive timer but the data packet isn't. This patch fixes it by checking the timestamp of the last received data packet too when the keepalive timer expires. Signed-off-by: Flavio Leitner Signed-off-by: Eric Dumazet Acked-by: Ilpo Järvinen Signed-off-by: David S. Miller --- include/net/tcp.h | 8 ++++++++ net/ipv4/tcp.c | 2 +- net/ipv4/tcp_timer.c | 4 ++-- 3 files changed, 11 insertions(+), 3 deletions(-) (limited to 'net/ipv4') diff --git a/include/net/tcp.h b/include/net/tcp.h index 3f87fd87bc9c..fb5c66b2ab81 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1033,6 +1033,14 @@ static inline int keepalive_probes(const struct tcp_sock *tp) return tp->keepalive_probes ? : sysctl_tcp_keepalive_probes; } +static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp) +{ + const struct inet_connection_sock *icsk = &tp->inet_conn; + + return min_t(u32, tcp_time_stamp - icsk->icsk_ack.lrcvtime, + tcp_time_stamp - tp->rcv_tstamp); +} + static inline int tcp_fin_time(const struct sock *sk) { int fin_timeout = tcp_sk(sk)->linger2 ? : sysctl_tcp_fin_timeout; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 6689c61cab47..8ce29747ad9b 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2298,7 +2298,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, if (sock_flag(sk, SOCK_KEEPOPEN) && !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { - __u32 elapsed = tcp_time_stamp - tp->rcv_tstamp; + u32 elapsed = keepalive_time_elapsed(tp); if (tp->keepalive_time > elapsed) elapsed = tp->keepalive_time - elapsed; else diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index c732be00606b..440a5c6004f6 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -517,7 +517,7 @@ static void tcp_keepalive_timer (unsigned long data) struct sock *sk = (struct sock *) data; struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); - __u32 elapsed; + u32 elapsed; /* Only process if socket is not in use. */ bh_lock_sock(sk); @@ -554,7 +554,7 @@ static void tcp_keepalive_timer (unsigned long data) if (tp->packets_out || tcp_send_head(sk)) goto resched; - elapsed = tcp_time_stamp - tp->rcv_tstamp; + elapsed = keepalive_time_elapsed(tp); if (elapsed >= keepalive_time_when(tp)) { if (icsk->icsk_probes_out >= keepalive_probes(tp)) { -- cgit v1.2.3 From c58dc01babfd58ec9e71a6ce080150dc27755d88 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 27 Apr 2010 15:05:31 -0700 Subject: net: Make RFS socket operations not be inet specific. Idea from Eric Dumazet. As for placement inside of struct sock, I tried to choose a place that otherwise has a 32-bit hole on 64-bit systems. Signed-off-by: David S. Miller Acked-by: Eric Dumazet --- include/net/inet_sock.h | 37 ------------------------------------- include/net/sock.h | 38 ++++++++++++++++++++++++++++++++++++++ net/ipv4/af_inet.c | 8 ++++---- net/ipv4/tcp_ipv4.c | 2 +- net/ipv4/udp.c | 4 ++-- 5 files changed, 45 insertions(+), 44 deletions(-) (limited to 'net/ipv4') diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index c1d42957b86b..1653de515cee 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -102,7 +102,6 @@ struct rtable; * @uc_ttl - Unicast TTL * @inet_sport - Source port * @inet_id - ID counter for DF pkts - * @rxhash - flow hash received from netif layer * @tos - TOS * @mc_ttl - Multicasting TTL * @is_icsk - is this an inet_connection_sock? @@ -126,9 +125,6 @@ struct inet_sock { __u16 cmsg_flags; __be16 inet_sport; __u16 inet_id; -#ifdef CONFIG_RPS - __u32 rxhash; -#endif struct ip_options *opt; __u8 tos; @@ -224,37 +220,4 @@ static inline __u8 inet_sk_flowi_flags(const struct sock *sk) return inet_sk(sk)->transparent ? FLOWI_FLAG_ANYSRC : 0; } -static inline void inet_rps_record_flow(const struct sock *sk) -{ -#ifdef CONFIG_RPS - struct rps_sock_flow_table *sock_flow_table; - - rcu_read_lock(); - sock_flow_table = rcu_dereference(rps_sock_flow_table); - rps_record_sock_flow(sock_flow_table, inet_sk(sk)->rxhash); - rcu_read_unlock(); -#endif -} - -static inline void inet_rps_reset_flow(const struct sock *sk) -{ -#ifdef CONFIG_RPS - struct rps_sock_flow_table *sock_flow_table; - - rcu_read_lock(); - sock_flow_table = rcu_dereference(rps_sock_flow_table); - rps_reset_sock_flow(sock_flow_table, inet_sk(sk)->rxhash); - rcu_read_unlock(); -#endif -} - -static inline void inet_rps_save_rxhash(struct sock *sk, u32 rxhash) -{ -#ifdef CONFIG_RPS - if (unlikely(inet_sk(sk)->rxhash != rxhash)) { - inet_rps_reset_flow(sk); - inet_sk(sk)->rxhash = rxhash; - } -#endif -} #endif /* _INET_SOCK_H */ diff --git a/include/net/sock.h b/include/net/sock.h index 4081db86a352..07822280d953 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -198,6 +198,7 @@ struct sock_common { * @sk_rcvlowat: %SO_RCVLOWAT setting * @sk_rcvtimeo: %SO_RCVTIMEO setting * @sk_sndtimeo: %SO_SNDTIMEO setting + * @sk_rxhash: flow hash received from netif layer * @sk_filter: socket filtering instructions * @sk_protinfo: private area, net family specific, when not using slab * @sk_timer: sock cleanup timer @@ -279,6 +280,9 @@ struct sock { int sk_gso_type; unsigned int sk_gso_max_size; int sk_rcvlowat; +#ifdef CONFIG_RPS + __u32 sk_rxhash; +#endif unsigned long sk_flags; unsigned long sk_lingertime; struct sk_buff_head sk_error_queue; @@ -620,6 +624,40 @@ static inline int sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) return sk->sk_backlog_rcv(sk, skb); } +static inline void sock_rps_record_flow(const struct sock *sk) +{ +#ifdef CONFIG_RPS + struct rps_sock_flow_table *sock_flow_table; + + rcu_read_lock(); + sock_flow_table = rcu_dereference(rps_sock_flow_table); + rps_record_sock_flow(sock_flow_table, sk->sk_rxhash); + rcu_read_unlock(); +#endif +} + +static inline void sock_rps_reset_flow(const struct sock *sk) +{ +#ifdef CONFIG_RPS + struct rps_sock_flow_table *sock_flow_table; + + rcu_read_lock(); + sock_flow_table = rcu_dereference(rps_sock_flow_table); + rps_reset_sock_flow(sock_flow_table, sk->sk_rxhash); + rcu_read_unlock(); +#endif +} + +static inline void sock_rps_save_rxhash(struct sock *sk, u32 rxhash) +{ +#ifdef CONFIG_RPS + if (unlikely(sk->sk_rxhash != rxhash)) { + sock_rps_reset_flow(sk); + sk->sk_rxhash = rxhash; + } +#endif +} + #define sk_wait_event(__sk, __timeo, __condition) \ ({ int __rc; \ release_sock(__sk); \ diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 9f52880fae10..c6c43bcd1c6f 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -419,7 +419,7 @@ int inet_release(struct socket *sock) if (sk) { long timeout; - inet_rps_reset_flow(sk); + sock_rps_reset_flow(sk); /* Applications forget to leave groups before exiting */ ip_mc_drop_socket(sk); @@ -722,7 +722,7 @@ int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, { struct sock *sk = sock->sk; - inet_rps_record_flow(sk); + sock_rps_record_flow(sk); /* We may need to bind the socket. */ if (!inet_sk(sk)->inet_num && inet_autobind(sk)) @@ -737,7 +737,7 @@ static ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, { struct sock *sk = sock->sk; - inet_rps_record_flow(sk); + sock_rps_record_flow(sk); /* We may need to bind the socket. */ if (!inet_sk(sk)->inet_num && inet_autobind(sk)) @@ -755,7 +755,7 @@ int inet_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, int addr_len = 0; int err; - inet_rps_record_flow(sk); + sock_rps_record_flow(sk); err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT, flags & ~MSG_DONTWAIT, &addr_len); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 4d6717d1e61c..771f8146a2e5 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1672,7 +1672,7 @@ process: skb->dev = NULL; - inet_rps_save_rxhash(sk, skb->rxhash); + sock_rps_save_rxhash(sk, skb->rxhash); bh_lock_sock_nested(sk); ret = 0; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 1e18f9cc9247..fa3d2874db41 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1217,7 +1217,7 @@ int udp_disconnect(struct sock *sk, int flags) sk->sk_state = TCP_CLOSE; inet->inet_daddr = 0; inet->inet_dport = 0; - inet_rps_save_rxhash(sk, 0); + sock_rps_save_rxhash(sk, 0); sk->sk_bound_dev_if = 0; if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) inet_reset_saddr(sk); @@ -1262,7 +1262,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) int rc; if (inet_sk(sk)->inet_daddr) - inet_rps_save_rxhash(sk, skb->rxhash); + sock_rps_save_rxhash(sk, skb->rxhash); rc = sock_queue_rcv_skb(sk, skb); if (rc < 0) { -- cgit v1.2.3 From c377411f2494a931ff7facdbb3a6839b1266bcf6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 27 Apr 2010 15:13:20 -0700 Subject: net: sk_add_backlog() take rmem_alloc into account Current socket backlog limit is not enough to really stop DDOS attacks, because user thread spend many time to process a full backlog each round, and user might crazy spin on socket lock. We should add backlog size and receive_queue size (aka rmem_alloc) to pace writers, and let user run without being slow down too much. Introduce a sk_rcvqueues_full() helper, to avoid taking socket lock in stress situations. Under huge stress from a multiqueue/RPS enabled NIC, a single flow udp receiver can now process ~200.000 pps (instead of ~100 pps before the patch) on a 8 core machine. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 13 +++++++++++-- net/core/sock.c | 5 ++++- net/ipv4/udp.c | 4 ++++ net/ipv6/udp.c | 8 ++++++++ net/sctp/socket.c | 3 --- 5 files changed, 27 insertions(+), 6 deletions(-) (limited to 'net/ipv4') diff --git a/include/net/sock.h b/include/net/sock.h index 07822280d953..cf12b1e61fa6 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -256,7 +256,6 @@ struct sock { struct sk_buff *head; struct sk_buff *tail; int len; - int limit; } sk_backlog; wait_queue_head_t *sk_sleep; struct dst_entry *sk_dst_cache; @@ -608,10 +607,20 @@ static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb) skb->next = NULL; } +/* + * Take into account size of receive queue and backlog queue + */ +static inline bool sk_rcvqueues_full(const struct sock *sk, const struct sk_buff *skb) +{ + unsigned int qsize = sk->sk_backlog.len + atomic_read(&sk->sk_rmem_alloc); + + return qsize + skb->truesize > sk->sk_rcvbuf; +} + /* The per-socket spinlock must be held here. */ static inline __must_check int sk_add_backlog(struct sock *sk, struct sk_buff *skb) { - if (sk->sk_backlog.len >= max(sk->sk_backlog.limit, sk->sk_rcvbuf << 1)) + if (sk_rcvqueues_full(sk, skb)) return -ENOBUFS; __sk_add_backlog(sk, skb); diff --git a/net/core/sock.c b/net/core/sock.c index 58ebd146ce5a..51041759517e 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -327,6 +327,10 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested) skb->dev = NULL; + if (sk_rcvqueues_full(sk, skb)) { + atomic_inc(&sk->sk_drops); + goto discard_and_relse; + } if (nested) bh_lock_sock_nested(sk); else @@ -1885,7 +1889,6 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_allocation = GFP_KERNEL; sk->sk_rcvbuf = sysctl_rmem_default; sk->sk_sndbuf = sysctl_wmem_default; - sk->sk_backlog.limit = sk->sk_rcvbuf << 1; sk->sk_state = TCP_CLOSE; sk_set_socket(sk, sock); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index fa3d2874db41..63eb56b2d873 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1372,6 +1372,10 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) goto drop; } + + if (sk_rcvqueues_full(sk, skb)) + goto drop; + rc = 0; bh_lock_sock(sk); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 2850e35cee3d..3ead20ad9d07 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -584,6 +584,10 @@ static void flush_stack(struct sock **stack, unsigned int count, sk = stack[i]; if (skb1) { + if (sk_rcvqueues_full(sk, skb)) { + kfree_skb(skb1); + goto drop; + } bh_lock_sock(sk); if (!sock_owned_by_user(sk)) udpv6_queue_rcv_skb(sk, skb1); @@ -759,6 +763,10 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, /* deliver */ + if (sk_rcvqueues_full(sk, skb)) { + sock_put(sk); + goto discard; + } bh_lock_sock(sk); if (!sock_owned_by_user(sk)) udpv6_queue_rcv_skb(sk, skb); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index f34adcca8a8c..13d8229f3a9c 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3721,9 +3721,6 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) SCTP_DBG_OBJCNT_INC(sock); percpu_counter_inc(&sctp_sockets_allocated); - /* Set socket backlog limit. */ - sk->sk_backlog.limit = sysctl_sctp_rmem[1]; - local_bh_disable(); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); local_bh_enable(); -- cgit v1.2.3 From 8d238b25b1ec22a73b1c2206f111df2faaff8285 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 28 Apr 2010 11:25:59 -0700 Subject: Revert "tcp: bind() fix when many ports are bound" This reverts two commits: fda48a0d7a8412cedacda46a9c0bf8ef9cd13559 tcp: bind() fix when many ports are bound and a follow-on fix for it: 6443bb1fc2050ca2b6585a3fa77f7833b55329ed ipv6: Fix inet6_csk_bind_conflict() It causes problems with binding listening sockets when time-wait sockets from a previous instance still are alive. It's too late to keep fiddling with this so late in the -rc series, and we'll deal with it in net-next-2.6 instead. Signed-off-by: David S. Miller --- net/ipv4/inet_connection_sock.c | 16 +++++----------- net/ipv6/inet6_connection_sock.c | 15 +++++---------- 2 files changed, 10 insertions(+), 21 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 14825eb09770..8da6429269dd 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -70,17 +70,13 @@ int inet_csk_bind_conflict(const struct sock *sk, (!sk->sk_bound_dev_if || !sk2->sk_bound_dev_if || sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { - const __be32 sk2_rcv_saddr = inet_rcv_saddr(sk2); - if (!reuse || !sk2->sk_reuse || sk2->sk_state == TCP_LISTEN) { + const __be32 sk2_rcv_saddr = inet_rcv_saddr(sk2); if (!sk2_rcv_saddr || !sk_rcv_saddr || sk2_rcv_saddr == sk_rcv_saddr) break; - } else if (reuse && sk2->sk_reuse && - sk2_rcv_saddr && - sk2_rcv_saddr == sk_rcv_saddr) - break; + } } } return node != NULL; @@ -124,11 +120,9 @@ again: smallest_size = tb->num_owners; smallest_rover = rover; if (atomic_read(&hashinfo->bsockets) > (high - low) + 1) { - if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb)) { - spin_unlock(&head->lock); - snum = smallest_rover; - goto have_snum; - } + spin_unlock(&head->lock); + snum = smallest_rover; + goto have_snum; } } goto next; diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 3a4d92b5a83e..628db24bcf22 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -42,16 +42,11 @@ int inet6_csk_bind_conflict(const struct sock *sk, if (sk != sk2 && (!sk->sk_bound_dev_if || !sk2->sk_bound_dev_if || - sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { - if ((!sk->sk_reuse || !sk2->sk_reuse || - sk2->sk_state == TCP_LISTEN) && - ipv6_rcv_saddr_equal(sk, sk2)) - break; - else if (sk->sk_reuse && sk2->sk_reuse && - !ipv6_addr_any(inet6_rcv_saddr(sk)) && - ipv6_rcv_saddr_equal(sk, sk2)) - break; - } + sk->sk_bound_dev_if == sk2->sk_bound_dev_if) && + (!sk->sk_reuse || !sk2->sk_reuse || + sk2->sk_state == TCP_LISTEN) && + ipv6_rcv_saddr_equal(sk, sk2)) + break; } return node != NULL; -- cgit v1.2.3 From 4b0b72f7dd617b13abd1b04c947e15873e011a24 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 28 Apr 2010 14:35:48 -0700 Subject: net: speedup udp receive path Since commit 95766fff ([UDP]: Add memory accounting.), each received packet needs one extra sock_lock()/sock_release() pair. This added latency because of possible backlog handling. Then later, ticket spinlocks added yet another latency source in case of DDOS. This patch introduces lock_sock_bh() and unlock_sock_bh() synchronization primitives, avoiding one atomic operation and backlog processing. skb_free_datagram_locked() uses them instead of full blown lock_sock()/release_sock(). skb is orphaned inside locked section for proper socket memory reclaim, and finally freed outside of it. UDP receive path now take the socket spinlock only once. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 10 ++++++++++ net/core/datagram.c | 10 +++++++--- net/ipv4/udp.c | 12 ++++++------ net/ipv6/udp.c | 4 ++-- 4 files changed, 25 insertions(+), 11 deletions(-) (limited to 'net/ipv4') diff --git a/include/net/sock.h b/include/net/sock.h index cf12b1e61fa6..d361c7769fe0 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1021,6 +1021,16 @@ extern void release_sock(struct sock *sk); SINGLE_DEPTH_NESTING) #define bh_unlock_sock(__sk) spin_unlock(&((__sk)->sk_lock.slock)) +static inline void lock_sock_bh(struct sock *sk) +{ + spin_lock_bh(&sk->sk_lock.slock); +} + +static inline void unlock_sock_bh(struct sock *sk) +{ + spin_unlock_bh(&sk->sk_lock.slock); +} + extern struct sock *sk_alloc(struct net *net, int family, gfp_t priority, struct proto *prot); diff --git a/net/core/datagram.c b/net/core/datagram.c index 5574a5ddf908..95b851f3d713 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -229,9 +229,13 @@ EXPORT_SYMBOL(skb_free_datagram); void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb) { - lock_sock(sk); - skb_free_datagram(sk, skb); - release_sock(sk); + lock_sock_bh(sk); + skb_orphan(skb); + sk_mem_reclaim_partial(sk); + unlock_sock_bh(sk); + + /* skb is now orphaned, might be freed outside of locked section */ + consume_skb(skb); } EXPORT_SYMBOL(skb_free_datagram_locked); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 63eb56b2d873..1f86965ba7d7 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1062,10 +1062,10 @@ static unsigned int first_packet_length(struct sock *sk) spin_unlock_bh(&rcvq->lock); if (!skb_queue_empty(&list_kill)) { - lock_sock(sk); + lock_sock_bh(sk); __skb_queue_purge(&list_kill); sk_mem_reclaim_partial(sk); - release_sock(sk); + unlock_sock_bh(sk); } return res; } @@ -1196,10 +1196,10 @@ out: return err; csum_copy_err: - lock_sock(sk); + lock_sock_bh(sk); if (!skb_kill_datagram(sk, skb, flags)) UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite); - release_sock(sk); + unlock_sock_bh(sk); if (noblock) return -EAGAIN; @@ -1624,9 +1624,9 @@ int udp_rcv(struct sk_buff *skb) void udp_destroy_sock(struct sock *sk) { - lock_sock(sk); + lock_sock_bh(sk); udp_flush_pending_frames(sk); - release_sock(sk); + unlock_sock_bh(sk); } /* diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 3ead20ad9d07..91c60f0090a4 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -424,7 +424,7 @@ out: return err; csum_copy_err: - lock_sock(sk); + lock_sock_bh(sk); if (!skb_kill_datagram(sk, skb, flags)) { if (is_udp4) UDP_INC_STATS_USER(sock_net(sk), @@ -433,7 +433,7 @@ csum_copy_err: UDP6_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite); } - release_sock(sk); + unlock_sock_bh(sk); if (flags & MSG_DONTWAIT) return -EAGAIN; -- cgit v1.2.3 From f84af32cbca70a3c6d30463dc08c7984af11c277 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 28 Apr 2010 15:31:51 -0700 Subject: net: ip_queue_rcv_skb() helper When queueing a skb to socket, we can immediately release its dst if target socket do not use IP_CMSG_PKTINFO. tcp_data_queue() can drop dst too. This to benefit from a hot cache line and avoid the receiver, possibly on another cpu, to dirty this cache line himself. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ip.h | 1 + net/ipv4/ip_sockglue.c | 16 ++++++++++++++++ net/ipv4/raw.c | 2 +- net/ipv4/tcp_input.c | 1 + net/ipv4/udp.c | 2 +- net/ipv6/raw.c | 2 +- net/ipv6/udp.c | 2 +- 7 files changed, 22 insertions(+), 4 deletions(-) (limited to 'net/ipv4') diff --git a/include/net/ip.h b/include/net/ip.h index a84ceb692687..8149b77cea9b 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -393,6 +393,7 @@ extern int ip_options_rcv_srr(struct sk_buff *skb); * Functions provided by ip_sockglue.c */ +extern int ip_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); extern void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb); extern int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc); diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index b0aa0546a3b3..ce231780a2b1 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -954,6 +954,22 @@ e_inval: return -EINVAL; } +/** + * ip_queue_rcv_skb - Queue an skb into sock receive queue + * @sk: socket + * @skb: buffer + * + * Queues an skb into socket receive queue. If IP_CMSG_PKTINFO option + * is not set, we drop skb dst entry now, while dst cache line is hot. + */ +int ip_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +{ + if (!(inet_sk(sk)->cmsg_flags & IP_CMSG_PKTINFO)) + skb_dst_drop(skb); + return sock_queue_rcv_skb(sk, skb); +} +EXPORT_SYMBOL(ip_queue_rcv_skb); + int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen) { diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index cc6f097fbd5f..52ef5af78a45 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -290,7 +290,7 @@ static int raw_rcv_skb(struct sock * sk, struct sk_buff * skb) { /* Charge it to the socket. */ - if (sock_queue_rcv_skb(sk, skb) < 0) { + if (ip_queue_rcv_skb(sk, skb) < 0) { kfree_skb(skb); return NET_RX_DROP; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index ae3ec15fb630..e82162c211bf 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4367,6 +4367,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) goto drop; + skb_dst_drop(skb); __skb_pull(skb, th->doff * 4); TCP_ECN_accept_cwr(tp, skb); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 1f86965ba7d7..4560b291180b 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1264,7 +1264,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) if (inet_sk(sk)->inet_daddr) sock_rps_save_rxhash(sk, skb->rxhash); - rc = sock_queue_rcv_skb(sk, skb); + rc = ip_queue_rcv_skb(sk, skb); if (rc < 0) { int is_udplite = IS_UDPLITE(sk); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 85627386cb02..0e3d2dd92078 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -381,7 +381,7 @@ static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb) } /* Charge it to the socket. */ - if (sock_queue_rcv_skb(sk, skb) < 0) { + if (ip_queue_rcv_skb(sk, skb) < 0) { kfree_skb(skb); return NET_RX_DROP; } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 91c60f0090a4..79359c8380bc 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -514,7 +514,7 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) goto drop; } - if ((rc = sock_queue_rcv_skb(sk, skb)) < 0) { + if ((rc = ip_queue_rcv_skb(sk, skb)) < 0) { /* Note that an ENOMEM error is charged twice */ if (rc == -ENOMEM) UDP6_INC_STATS_BH(sock_net(sk), -- cgit v1.2.3 From 3ee943728fff536edaf8f59faa58aaa1aa7366e3 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Sat, 8 May 2010 01:57:52 -0700 Subject: ipv4: remove ip_rt_secret timer (v4) A while back there was a discussion regarding the rt_secret_interval timer. Given that we've had the ability to do emergency route cache rebuilds for awhile now, based on a statistical analysis of the various hash chain lengths in the cache, the use of the flush timer is somewhat redundant. This patch removes the rt_secret_interval sysctl, allowing us to rely solely on the statistical analysis mechanism to determine the need for route cache flushes. Signed-off-by: Neil Horman Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 - kernel/sysctl_binary.c | 1 - net/ipv4/route.c | 108 ++++------------------------------------------- 3 files changed, 8 insertions(+), 102 deletions(-) (limited to 'net/ipv4') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index ae07feec6446..d68c3f121774 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -55,7 +55,6 @@ struct netns_ipv4 { int sysctl_rt_cache_rebuild_count; int current_rt_cache_rebuild_count; - struct timer_list rt_secret_timer; atomic_t rt_genid; #ifdef CONFIG_IP_MROUTE diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c index 59030570f5ca..937d31dc8566 100644 --- a/kernel/sysctl_binary.c +++ b/kernel/sysctl_binary.c @@ -224,7 +224,6 @@ static const struct bin_table bin_net_ipv4_route_table[] = { { CTL_INT, NET_IPV4_ROUTE_MTU_EXPIRES, "mtu_expires" }, { CTL_INT, NET_IPV4_ROUTE_MIN_PMTU, "min_pmtu" }, { CTL_INT, NET_IPV4_ROUTE_MIN_ADVMSS, "min_adv_mss" }, - { CTL_INT, NET_IPV4_ROUTE_SECRET_INTERVAL, "secret_interval" }, {} }; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index a947428ef0ae..dea3f9264250 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -129,7 +129,6 @@ static int ip_rt_gc_elasticity __read_mostly = 8; static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ; static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; static int ip_rt_min_advmss __read_mostly = 256; -static int ip_rt_secret_interval __read_mostly = 10 * 60 * HZ; static int rt_chain_length_max __read_mostly = 20; static struct delayed_work expires_work; @@ -918,32 +917,11 @@ void rt_cache_flush_batch(void) rt_do_flush(!in_softirq()); } -/* - * We change rt_genid and let gc do the cleanup - */ -static void rt_secret_rebuild(unsigned long __net) -{ - struct net *net = (struct net *)__net; - rt_cache_invalidate(net); - mod_timer(&net->ipv4.rt_secret_timer, jiffies + ip_rt_secret_interval); -} - -static void rt_secret_rebuild_oneshot(struct net *net) -{ - del_timer_sync(&net->ipv4.rt_secret_timer); - rt_cache_invalidate(net); - if (ip_rt_secret_interval) - mod_timer(&net->ipv4.rt_secret_timer, jiffies + ip_rt_secret_interval); -} - static void rt_emergency_hash_rebuild(struct net *net) { - if (net_ratelimit()) { + if (net_ratelimit()) printk(KERN_WARNING "Route hash chain too long!\n"); - printk(KERN_WARNING "Adjust your secret_interval!\n"); - } - - rt_secret_rebuild_oneshot(net); + rt_cache_invalidate(net); } /* @@ -3101,48 +3079,6 @@ static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write, return -EINVAL; } -static void rt_secret_reschedule(int old) -{ - struct net *net; - int new = ip_rt_secret_interval; - int diff = new - old; - - if (!diff) - return; - - rtnl_lock(); - for_each_net(net) { - int deleted = del_timer_sync(&net->ipv4.rt_secret_timer); - long time; - - if (!new) - continue; - - if (deleted) { - time = net->ipv4.rt_secret_timer.expires - jiffies; - - if (time <= 0 || (time += diff) <= 0) - time = 0; - } else - time = new; - - mod_timer(&net->ipv4.rt_secret_timer, jiffies + time); - } - rtnl_unlock(); -} - -static int ipv4_sysctl_rt_secret_interval(ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos) -{ - int old = ip_rt_secret_interval; - int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos); - - rt_secret_reschedule(old); - - return ret; -} - static ctl_table ipv4_route_table[] = { { .procname = "gc_thresh", @@ -3251,13 +3187,6 @@ static ctl_table ipv4_route_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, - { - .procname = "secret_interval", - .data = &ip_rt_secret_interval, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = ipv4_sysctl_rt_secret_interval, - }, { } }; @@ -3336,34 +3265,15 @@ static __net_initdata struct pernet_operations sysctl_route_ops = { }; #endif - -static __net_init int rt_secret_timer_init(struct net *net) +static __net_init int rt_genid_init(struct net *net) { - atomic_set(&net->ipv4.rt_genid, - (int) ((num_physpages ^ (num_physpages>>8)) ^ - (jiffies ^ (jiffies >> 7)))); - - net->ipv4.rt_secret_timer.function = rt_secret_rebuild; - net->ipv4.rt_secret_timer.data = (unsigned long)net; - init_timer_deferrable(&net->ipv4.rt_secret_timer); - - if (ip_rt_secret_interval) { - net->ipv4.rt_secret_timer.expires = - jiffies + net_random() % ip_rt_secret_interval + - ip_rt_secret_interval; - add_timer(&net->ipv4.rt_secret_timer); - } + get_random_bytes(&net->ipv4.rt_genid, + sizeof(net->ipv4.rt_genid)); return 0; } -static __net_exit void rt_secret_timer_exit(struct net *net) -{ - del_timer_sync(&net->ipv4.rt_secret_timer); -} - -static __net_initdata struct pernet_operations rt_secret_timer_ops = { - .init = rt_secret_timer_init, - .exit = rt_secret_timer_exit, +static __net_initdata struct pernet_operations rt_genid_ops = { + .init = rt_genid_init, }; @@ -3424,9 +3334,6 @@ int __init ip_rt_init(void) schedule_delayed_work(&expires_work, net_random() % ip_rt_gc_interval + ip_rt_gc_interval); - if (register_pernet_subsys(&rt_secret_timer_ops)) - printk(KERN_ERR "Unable to setup rt_secret_timer\n"); - if (ip_rt_proc_init()) printk(KERN_ERR "Unable to create route proc files\n"); #ifdef CONFIG_XFRM @@ -3438,6 +3345,7 @@ int __init ip_rt_init(void) #ifdef CONFIG_SYSCTL register_pernet_subsys(&sysctl_route_ops); #endif + register_pernet_subsys(&rt_genid_ops); return rc; } -- cgit v1.2.3