From 4185392da4b4b494e51934c51b999b4df424afba Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Mon, 6 Aug 2012 15:49:47 -0700 Subject: openvswitch: Relax set header validation. When installing a flow with an action to set a particular field we need to validate that the packets that are part of the flow actually contain that header. With IP we use zeroed addresses and with TCP/UDP the check is for zeroed ports. This check is overly broad and can catch packets like DHCP requests that have a zero source address in a legitimate header. This changes the check to look for a zeroed protocol number for IP or for both ports be zero for TCP/UDP before considering the header to not exist. Reported-by: Ethan Jackson Signed-off-by: Jesse Gross --- net/openvswitch/datapath.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index d8277d29e710..cf58cedad083 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -425,10 +425,10 @@ static int validate_sample(const struct nlattr *attr, static int validate_tp_port(const struct sw_flow_key *flow_key) { if (flow_key->eth.type == htons(ETH_P_IP)) { - if (flow_key->ipv4.tp.src && flow_key->ipv4.tp.dst) + if (flow_key->ipv4.tp.src || flow_key->ipv4.tp.dst) return 0; } else if (flow_key->eth.type == htons(ETH_P_IPV6)) { - if (flow_key->ipv6.tp.src && flow_key->ipv6.tp.dst) + if (flow_key->ipv6.tp.src || flow_key->ipv6.tp.dst) return 0; } @@ -460,7 +460,7 @@ static int validate_set(const struct nlattr *a, if (flow_key->eth.type != htons(ETH_P_IP)) return -EINVAL; - if (!flow_key->ipv4.addr.src || !flow_key->ipv4.addr.dst) + if (!flow_key->ip.proto) return -EINVAL; ipv4_key = nla_data(ovs_key); -- cgit v1.2.3 From 61a0cfb008f57ecf7eb28ee762952fb42dc15d15 Mon Sep 17 00:00:00 2001 From: Andre Guedes Date: Wed, 1 Aug 2012 20:34:15 -0300 Subject: Bluetooth: Fix use-after-free bug in SMP If SMP fails, we should always cancel security_timer delayed work. Otherwise, security_timer function may run after l2cap_conn object has been freed. This patch fixes the following warning reported by ODEBUG: WARNING: at lib/debugobjects.c:261 debug_print_object+0x7c/0x8d() Hardware name: Bochs ODEBUG: free active (active state 0) object type: timer_list hint: delayed_work_timer_fn+0x0/0x27 Modules linked in: btusb bluetooth Pid: 440, comm: kworker/u:2 Not tainted 3.5.0-rc1+ #4 Call Trace: [] ? free_obj_work+0x4a/0x7f [] warn_slowpath_common+0x7e/0x97 [] warn_slowpath_fmt+0x41/0x43 [] debug_print_object+0x7c/0x8d [] ? __queue_work+0x241/0x241 [] debug_check_no_obj_freed+0x92/0x159 [] slab_free_hook+0x6f/0x77 [] ? l2cap_conn_del+0x148/0x157 [bluetooth] [] kfree+0x59/0xac [] l2cap_conn_del+0x148/0x157 [bluetooth] [] l2cap_recv_frame+0xa77/0xfa4 [bluetooth] [] ? trace_hardirqs_on_caller+0x112/0x1ad [] l2cap_recv_acldata+0xe2/0x264 [bluetooth] [] hci_rx_work+0x235/0x33c [bluetooth] [] ? process_one_work+0x126/0x2fe [] process_one_work+0x185/0x2fe [] ? process_one_work+0x126/0x2fe [] ? lock_acquired+0x1b5/0x1cf [] ? le_scan_work+0x11d/0x11d [bluetooth] [] ? spin_lock_irq+0x9/0xb [] worker_thread+0xcf/0x175 [] ? rescuer_thread+0x175/0x175 [] kthread+0x95/0x9d [] kernel_threadi_helper+0x4/0x10 [] ? retint_restore_args+0x13/0x13 [] ? flush_kthread_worker+0xdb/0xdb [] ? gs_change+0x13/0x13 This bug can be reproduced using hctool lecc or l2test tools and bluetoothd not running. Signed-off-by: Andre Guedes Signed-off-by: Gustavo Padovan --- net/bluetooth/smp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 901a616c8083..98ffc1b6a6fa 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -267,10 +267,10 @@ static void smp_failure(struct l2cap_conn *conn, u8 reason, u8 send) mgmt_auth_failed(conn->hcon->hdev, conn->dst, hcon->type, hcon->dst_type, reason); - if (test_and_clear_bit(HCI_CONN_LE_SMP_PEND, &conn->hcon->flags)) { - cancel_delayed_work_sync(&conn->security_timer); + cancel_delayed_work_sync(&conn->security_timer); + + if (test_and_clear_bit(HCI_CONN_LE_SMP_PEND, &conn->hcon->flags)) smp_chan_destroy(conn); - } } #define JUST_WORKS 0x00 -- cgit v1.2.3 From cc110922da7e902b62d18641a370fec01a9fa794 Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Thu, 23 Aug 2012 21:32:43 -0300 Subject: Bluetooth: Change signature of smp_conn_security() To make it clear that it may be called from contexts that may not have any knowledge of L2CAP, we change the connection parameter, to receive a hci_conn. This also makes it clear that it is checking the security of the link. Signed-off-by: Vinicius Costa Gomes Signed-off-by: Gustavo Padovan --- include/net/bluetooth/smp.h | 2 +- net/bluetooth/l2cap_core.c | 11 ++++++----- net/bluetooth/l2cap_sock.c | 2 +- net/bluetooth/smp.c | 4 ++-- 4 files changed, 10 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/include/net/bluetooth/smp.h b/include/net/bluetooth/smp.h index ca356a734920..8b27927b2a55 100644 --- a/include/net/bluetooth/smp.h +++ b/include/net/bluetooth/smp.h @@ -136,7 +136,7 @@ struct smp_chan { }; /* SMP Commands */ -int smp_conn_security(struct l2cap_conn *conn, __u8 sec_level); +int smp_conn_security(struct hci_conn *hcon, __u8 sec_level); int smp_sig_channel(struct l2cap_conn *conn, struct sk_buff *skb); int smp_distribute_keys(struct l2cap_conn *conn, __u8 force); int smp_user_confirm_reply(struct hci_conn *conn, u16 mgmt_op, __le32 passkey); diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index daa149b7003c..4ea1710a4783 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1199,14 +1199,15 @@ clean: static void l2cap_conn_ready(struct l2cap_conn *conn) { struct l2cap_chan *chan; + struct hci_conn *hcon = conn->hcon; BT_DBG("conn %p", conn); - if (!conn->hcon->out && conn->hcon->type == LE_LINK) + if (!hcon->out && hcon->type == LE_LINK) l2cap_le_conn_ready(conn); - if (conn->hcon->out && conn->hcon->type == LE_LINK) - smp_conn_security(conn, conn->hcon->pending_sec_level); + if (hcon->out && hcon->type == LE_LINK) + smp_conn_security(hcon, hcon->pending_sec_level); mutex_lock(&conn->chan_lock); @@ -1219,8 +1220,8 @@ static void l2cap_conn_ready(struct l2cap_conn *conn) continue; } - if (conn->hcon->type == LE_LINK) { - if (smp_conn_security(conn, chan->sec_level)) + if (hcon->type == LE_LINK) { + if (smp_conn_security(hcon, chan->sec_level)) l2cap_chan_ready(chan); } else if (chan->chan_type != L2CAP_CHAN_CONN_ORIENTED) { diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index b94abd30e6f9..45cb0b0dd2c7 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -615,7 +615,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, ch break; } - if (smp_conn_security(conn, sec.level)) + if (smp_conn_security(conn->hcon, sec.level)) break; sk->sk_state = BT_CONFIG; chan->state = BT_CONFIG; diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 98ffc1b6a6fa..8c225ef349cd 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -760,9 +760,9 @@ static u8 smp_cmd_security_req(struct l2cap_conn *conn, struct sk_buff *skb) return 0; } -int smp_conn_security(struct l2cap_conn *conn, __u8 sec_level) +int smp_conn_security(struct hci_conn *hcon, __u8 sec_level) { - struct hci_conn *hcon = conn->hcon; + struct l2cap_conn *conn = hcon->l2cap_data; struct smp_chan *smp = conn->smp_chan; __u8 authreq; -- cgit v1.2.3 From d8343f125710fb596f7a88cd756679f14f4e77b9 Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Thu, 23 Aug 2012 21:32:44 -0300 Subject: Bluetooth: Fix sending a HCI Authorization Request over LE links MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the case that the link is already in the connected state and a Pairing request arrives from the mgmt interface, hci_conn_security() would be called but it was not considering LE links. Reported-by: João Paulo Rechi Vita Signed-off-by: Vinicius Costa Gomes Signed-off-by: Gustavo Padovan --- net/bluetooth/hci_conn.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 5ad7da217474..3c094e78dde9 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -29,6 +29,7 @@ #include #include #include +#include static void hci_le_connect(struct hci_conn *conn) { @@ -619,6 +620,9 @@ int hci_conn_security(struct hci_conn *conn, __u8 sec_level, __u8 auth_type) { BT_DBG("hcon %p", conn); + if (conn->type == LE_LINK) + return smp_conn_security(conn, sec_level); + /* For sdp we don't need the link key. */ if (sec_level == BT_SECURITY_SDP) return 1; -- cgit v1.2.3 From 39855b5ba9a72a80de96009011b7f8b2fb01612b Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Fri, 31 Aug 2012 15:28:28 -0700 Subject: openvswitch: Fix typo Signed-off-by: Joe Stringer Signed-off-by: Jesse Gross --- net/openvswitch/actions.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index f3f96badf5aa..954405ceae9e 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -45,7 +45,7 @@ static int make_writable(struct sk_buff *skb, int write_len) return pskb_expand_head(skb, 0, 0, GFP_ATOMIC); } -/* remove VLAN header from packet and update csum accrodingly. */ +/* remove VLAN header from packet and update csum accordingly. */ static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci) { struct vlan_hdr *vhdr; -- cgit v1.2.3 From e812347ccf9e8ce073b0ba0c49d03b124707b2b4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 2 Sep 2012 23:57:18 +0000 Subject: net: sock_edemux() should take care of timewait sockets sock_edemux() can handle either a regular socket or a timewait socket Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/sock.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 8f67ced8d6a8..7f64467535d1 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1523,7 +1523,12 @@ EXPORT_SYMBOL(sock_rfree); void sock_edemux(struct sk_buff *skb) { - sock_put(skb->sk); + struct sock *sk = skb->sk; + + if (sk->sk_state == TCP_TIME_WAIT) + inet_twsk_put(inet_twsk(sk)); + else + sock_put(sk); } EXPORT_SYMBOL(sock_edemux); -- cgit v1.2.3 From 4c3a5bdae293f75cdf729c6c00124e8489af2276 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Mon, 3 Sep 2012 04:27:42 +0000 Subject: sctp: Don't charge for data in sndbuf again when transmitting packet SCTP charges wmem_alloc via sctp_set_owner_w() in sctp_sendmsg() and via skb_set_owner_w() in sctp_packet_transmit(). If a sender runs out of sndbuf it will sleep in sctp_wait_for_sndbuf() and expects to be waken up by __sctp_write_space(). Buffer space charged via sctp_set_owner_w() is released in sctp_wfree() which calls __sctp_write_space() directly. Buffer space charged via skb_set_owner_w() is released via sock_wfree() which calls sk->sk_write_space() _if_ SOCK_USE_WRITE_QUEUE is not set. sctp_endpoint_init() sets SOCK_USE_WRITE_QUEUE on all sockets. Therefore if sctp_packet_transmit() manages to queue up more than sndbuf bytes, sctp_wait_for_sndbuf() will never be woken up again unless it is interrupted by a signal. This could be fixed by clearing the SOCK_USE_WRITE_QUEUE flag but ... Charging for the data twice does not make sense in the first place, it leads to overcharging sndbuf by a factor 2. Therefore this patch only charges a single byte in wmem_alloc when transmitting an SCTP packet to ensure that the socket stays alive until the packet has been released. This means that control chunks are no longer accounted for in wmem_alloc which I believe is not a problem as skb->truesize will typically lead to overcharging anyway and thus compensates for any control overhead. Signed-off-by: Thomas Graf CC: Vlad Yasevich CC: Neil Horman CC: David Miller Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/output.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/output.c b/net/sctp/output.c index 838e18b4d7ea..be50aa234dcd 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -364,6 +364,25 @@ finish: return retval; } +static void sctp_packet_release_owner(struct sk_buff *skb) +{ + sk_free(skb->sk); +} + +static void sctp_packet_set_owner_w(struct sk_buff *skb, struct sock *sk) +{ + skb_orphan(skb); + skb->sk = sk; + skb->destructor = sctp_packet_release_owner; + + /* + * The data chunks have already been accounted for in sctp_sendmsg(), + * therefore only reserve a single byte to keep socket around until + * the packet has been transmitted. + */ + atomic_inc(&sk->sk_wmem_alloc); +} + /* All packets are sent to the network through this function from * sctp_outq_tail(). * @@ -405,7 +424,7 @@ int sctp_packet_transmit(struct sctp_packet *packet) /* Set the owning socket so that we know where to get the * destination IP address. */ - skb_set_owner_w(nskb, sk); + sctp_packet_set_owner_w(nskb, sk); if (!sctp_transport_dst_check(tp)) { sctp_transport_route(tp, NULL, sctp_sk(sk)); -- cgit v1.2.3 From b379135c40163ae79ba7a54e6928b53983e74ee8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 1 Sep 2012 03:19:57 +0000 Subject: fq_codel: dont reinit flow state When fq_codel builds a new flow, it should not reset codel state. Codel algo needs to get previous values (lastcount, drop_next) to get proper behavior. Signed-off-by: Dave Taht Signed-off-by: Eric Dumazet Acked-by: Dave Taht Signed-off-by: David S. Miller --- net/sched/sch_fq_codel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 9fc1c62ec80e..4e606fcb2534 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -191,7 +191,6 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch) if (list_empty(&flow->flowchain)) { list_add_tail(&flow->flowchain, &q->new_flows); - codel_vars_init(&flow->cvars); q->new_flow_count++; flow->deficit = q->quantum; flow->dropped = 0; @@ -418,6 +417,7 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt) struct fq_codel_flow *flow = q->flows + i; INIT_LIST_HEAD(&flow->flowchain); + codel_vars_init(&flow->cvars); } } if (sch->limit >= 1) -- cgit v1.2.3 From c303aa94cdae483a7577230e61720e126e600a52 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Mon, 3 Sep 2012 19:06:27 -0700 Subject: openvswitch: Fix FLOW_BUFSIZE definition. The vlan encapsulation fields in the maximum flow defintion were never updated when the representation changed before upstreaming. In theory this could cause a kernel panic when a maximum length flow is used. In practice this has never happened (to my knowledge) because skb allocations are padded out to a cache line so you would need the right combination of flow and packet being sent to userspace. Signed-off-by: Jesse Gross --- net/openvswitch/flow.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index 9b75617ca4e0..c30df1a10c67 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -145,15 +145,17 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies); * OVS_KEY_ATTR_PRIORITY 4 -- 4 8 * OVS_KEY_ATTR_IN_PORT 4 -- 4 8 * OVS_KEY_ATTR_ETHERNET 12 -- 4 16 + * OVS_KEY_ATTR_ETHERTYPE 2 2 4 8 (outer VLAN ethertype) * OVS_KEY_ATTR_8021Q 4 -- 4 8 - * OVS_KEY_ATTR_ETHERTYPE 2 2 4 8 + * OVS_KEY_ATTR_ENCAP 0 -- 4 4 (VLAN encapsulation) + * OVS_KEY_ATTR_ETHERTYPE 2 2 4 8 (inner VLAN ethertype) * OVS_KEY_ATTR_IPV6 40 -- 4 44 * OVS_KEY_ATTR_ICMPV6 2 2 4 8 * OVS_KEY_ATTR_ND 28 -- 4 32 * ------------------------------------------------- - * total 132 + * total 144 */ -#define FLOW_BUFSIZE 132 +#define FLOW_BUFSIZE 144 int ovs_flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *); int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, -- cgit v1.2.3 From 3d2abdfdf14f4d6decc2023708211e19b096f4ca Mon Sep 17 00:00:00 2001 From: Eliad Peller Date: Tue, 4 Sep 2012 17:44:45 +0300 Subject: mac80211: clear bssid on auth/assoc failure ifmgd->bssid wasn't cleared properly in some auth/assoc failure cases, causing mac80211 and the low-level driver to go out of sync. Clear ifmgd->bssid on failure, and notify the driver. Cc: stable@kernel.org # 3.4+ Signed-off-by: Eliad Peller Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index a4a5acdbaa4d..f76b83341cf9 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -3248,6 +3248,8 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, goto out_unlock; err_clear: + memset(ifmgd->bssid, 0, ETH_ALEN); + ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BSSID); ifmgd->auth_data = NULL; err_free: kfree(auth_data); @@ -3439,6 +3441,8 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, err = 0; goto out; err_clear: + memset(ifmgd->bssid, 0, ETH_ALEN); + ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BSSID); ifmgd->assoc_data = NULL; err_free: kfree(assoc_data); -- cgit v1.2.3 From b4e4f47e940bc93c5b1125a4429ff53956754800 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sun, 2 Sep 2012 21:41:04 +0800 Subject: nl80211: fix possible memory leak nl80211_connect() connkeys is malloced in nl80211_parse_connkeys() and should be freed in the error handling case, otherwise it will cause memory leak. spatch with a semantic match is used to found this problem. (http://coccinelle.lip6.fr/) Signed-off-by: Wei Yongjun Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 97026f3b215a..1e37dbf00cb3 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -5633,8 +5633,10 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info) sizeof(connect.ht_capa_mask)); if (info->attrs[NL80211_ATTR_HT_CAPABILITY]) { - if (!info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK]) + if (!info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK]) { + kfree(connkeys); return -EINVAL; + } memcpy(&connect.ht_capa, nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]), sizeof(connect.ht_capa)); -- cgit v1.2.3 From 6cf5c951175abcec4da470c50565cc0afe6cd11d Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Tue, 4 Sep 2012 04:13:18 +0000 Subject: netrom: copy_datagram_iovec can fail Check for an error from this and if so bail properly. Signed-off-by: Alan Cox Signed-off-by: David S. Miller --- net/netrom/af_netrom.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 06592d8b4a2b..1b9024ee963c 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -1169,7 +1169,12 @@ static int nr_recvmsg(struct kiocb *iocb, struct socket *sock, msg->msg_flags |= MSG_TRUNC; } - skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); + er = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); + if (er < 0) { + skb_free_datagram(sk, skb); + release_sock(sk); + return er; + } if (sax != NULL) { sax->sax25_family = AF_NETROM; -- cgit v1.2.3 From 37159ef2c1ae1e696b24b260b241209a19f92c60 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 4 Sep 2012 07:18:57 +0000 Subject: l2tp: fix a lockdep splat Fixes following lockdep splat : [ 1614.734896] ============================================= [ 1614.734898] [ INFO: possible recursive locking detected ] [ 1614.734901] 3.6.0-rc3+ #782 Not tainted [ 1614.734903] --------------------------------------------- [ 1614.734905] swapper/11/0 is trying to acquire lock: [ 1614.734907] (slock-AF_INET){+.-...}, at: [] l2tp_xmit_skb+0x172/0xa50 [l2tp_core] [ 1614.734920] [ 1614.734920] but task is already holding lock: [ 1614.734922] (slock-AF_INET){+.-...}, at: [] tcp_v4_err+0x163/0x6b0 [ 1614.734932] [ 1614.734932] other info that might help us debug this: [ 1614.734935] Possible unsafe locking scenario: [ 1614.734935] [ 1614.734937] CPU0 [ 1614.734938] ---- [ 1614.734940] lock(slock-AF_INET); [ 1614.734943] lock(slock-AF_INET); [ 1614.734946] [ 1614.734946] *** DEADLOCK *** [ 1614.734946] [ 1614.734949] May be due to missing lock nesting notation [ 1614.734949] [ 1614.734952] 7 locks held by swapper/11/0: [ 1614.734954] #0: (rcu_read_lock){.+.+..}, at: [] __netif_receive_skb+0x251/0xd00 [ 1614.734964] #1: (rcu_read_lock){.+.+..}, at: [] ip_local_deliver_finish+0x4c/0x4e0 [ 1614.734972] #2: (rcu_read_lock){.+.+..}, at: [] icmp_socket_deliver+0x46/0x230 [ 1614.734982] #3: (slock-AF_INET){+.-...}, at: [] tcp_v4_err+0x163/0x6b0 [ 1614.734989] #4: (rcu_read_lock){.+.+..}, at: [] ip_queue_xmit+0x0/0x680 [ 1614.734997] #5: (rcu_read_lock_bh){.+....}, at: [] ip_finish_output+0x135/0x890 [ 1614.735004] #6: (rcu_read_lock_bh){.+....}, at: [] dev_queue_xmit+0x0/0xe00 [ 1614.735012] [ 1614.735012] stack backtrace: [ 1614.735016] Pid: 0, comm: swapper/11 Not tainted 3.6.0-rc3+ #782 [ 1614.735018] Call Trace: [ 1614.735020] [] __lock_acquire+0x144c/0x1b10 [ 1614.735033] [] ? check_usage+0x9b/0x4d0 [ 1614.735037] [] ? mark_held_locks+0x82/0x130 [ 1614.735042] [] lock_acquire+0x90/0x200 [ 1614.735047] [] ? l2tp_xmit_skb+0x172/0xa50 [l2tp_core] [ 1614.735051] [] ? trace_hardirqs_on+0xd/0x10 [ 1614.735060] [] _raw_spin_lock+0x41/0x50 [ 1614.735065] [] ? l2tp_xmit_skb+0x172/0xa50 [l2tp_core] [ 1614.735069] [] l2tp_xmit_skb+0x172/0xa50 [l2tp_core] [ 1614.735075] [] l2tp_eth_dev_xmit+0x32/0x60 [l2tp_eth] [ 1614.735079] [] dev_hard_start_xmit+0x502/0xa70 [ 1614.735083] [] ? dev_hard_start_xmit+0x5e/0xa70 [ 1614.735087] [] ? dev_queue_xmit+0x141/0xe00 [ 1614.735093] [] sch_direct_xmit+0xfe/0x290 [ 1614.735098] [] dev_queue_xmit+0x1e5/0xe00 [ 1614.735102] [] ? dev_hard_start_xmit+0xa70/0xa70 [ 1614.735106] [] ? eth_header+0x3a/0xf0 [ 1614.735111] [] ? fib_get_table+0x2e/0x280 [ 1614.735117] [] arp_xmit+0x22/0x60 [ 1614.735121] [] arp_send+0x43/0x50 [ 1614.735125] [] arp_solicit+0x18f/0x450 [ 1614.735132] [] neigh_probe+0x4a/0x70 [ 1614.735137] [] __neigh_event_send+0xea/0x300 [ 1614.735141] [] neigh_resolve_output+0x163/0x260 [ 1614.735146] [] ip_finish_output+0x505/0x890 [ 1614.735150] [] ? ip_finish_output+0x135/0x890 [ 1614.735154] [] ip_output+0x59/0xf0 [ 1614.735158] [] ip_local_out+0x2d/0xa0 [ 1614.735162] [] ip_queue_xmit+0x1c3/0x680 [ 1614.735165] [] ? ip_local_out+0xa0/0xa0 [ 1614.735172] [] tcp_transmit_skb+0x402/0xa60 [ 1614.735177] [] tcp_retransmit_skb+0x1a1/0x620 [ 1614.735181] [] tcp_retransmit_timer+0x393/0x960 [ 1614.735185] [] ? tcp_v4_err+0x163/0x6b0 [ 1614.735189] [] tcp_v4_err+0x657/0x6b0 [ 1614.735194] [] ? icmp_socket_deliver+0x46/0x230 [ 1614.735199] [] icmp_socket_deliver+0xce/0x230 [ 1614.735203] [] ? icmp_socket_deliver+0x46/0x230 [ 1614.735208] [] icmp_unreach+0xe4/0x2c0 [ 1614.735213] [] icmp_rcv+0x350/0x4a0 [ 1614.735217] [] ip_local_deliver_finish+0x135/0x4e0 [ 1614.735221] [] ? ip_local_deliver_finish+0x4c/0x4e0 [ 1614.735225] [] ip_local_deliver+0x4a/0x90 [ 1614.735229] [] ip_rcv_finish+0x187/0x730 [ 1614.735233] [] ip_rcv+0x21d/0x300 [ 1614.735237] [] __netif_receive_skb+0x46b/0xd00 [ 1614.735241] [] ? __netif_receive_skb+0x251/0xd00 [ 1614.735245] [] process_backlog+0xb8/0x180 [ 1614.735249] [] net_rx_action+0x159/0x330 [ 1614.735257] [] __do_softirq+0xd0/0x3e0 [ 1614.735264] [] ? tick_program_event+0x24/0x30 [ 1614.735270] [] call_softirq+0x1c/0x30 [ 1614.735278] [] do_softirq+0x8d/0xc0 [ 1614.735282] [] irq_exit+0xae/0xe0 [ 1614.735287] [] smp_apic_timer_interrupt+0x6e/0x99 [ 1614.735291] [] apic_timer_interrupt+0x6c/0x80 [ 1614.735293] [] ? trace_hardirqs_off+0xd/0x10 [ 1614.735306] [] ? intel_idle+0xf5/0x150 [ 1614.735310] [] ? intel_idle+0xee/0x150 [ 1614.735317] [] cpuidle_enter+0x19/0x20 [ 1614.735321] [] cpuidle_idle_call+0xa8/0x630 [ 1614.735327] [] cpu_idle+0x8a/0xe0 [ 1614.735333] [] start_secondary+0x220/0x222 Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 513cab08a986..1a9f3723c13c 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1501,6 +1501,8 @@ out: return err; } +static struct lock_class_key l2tp_socket_class; + int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp) { struct l2tp_tunnel *tunnel = NULL; @@ -1605,6 +1607,8 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 tunnel->old_sk_destruct = sk->sk_destruct; sk->sk_destruct = &l2tp_tunnel_destruct; tunnel->sock = sk; + lockdep_set_class_and_name(&sk->sk_lock.slock, &l2tp_socket_class, "l2tp_sock"); + sk->sk_allocation = GFP_ATOMIC; /* Add tunnel to our list */ -- cgit v1.2.3 From 3b59df46a449ec9975146d71318c4777ad086744 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Tue, 4 Sep 2012 00:03:29 +0000 Subject: xfrm: Workaround incompatibility of ESN and async crypto ESN for esp is defined in RFC 4303. This RFC assumes that the sequence number counters are always up to date. However, this is not true if an async crypto algorithm is employed. If the sequence number counters are not up to date on sequence number check, we may incorrectly update the upper 32 bit of the sequence number. This leads to a DOS. We workaround this by comparing the upper sequence number, (used for authentication) with the upper sequence number computed after the async processing. We drop the packet if these numbers are different. To do this, we introduce a recheck function that does this check in the ESN case. Signed-off-by: Steffen Klassert Acked-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/xfrm.h | 3 +++ net/xfrm/xfrm_input.c | 2 +- net/xfrm/xfrm_replay.c | 15 +++++++++++++++ 3 files changed, 19 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 976a81abe1a2..639dd1316d37 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -273,6 +273,9 @@ struct xfrm_replay { int (*check)(struct xfrm_state *x, struct sk_buff *skb, __be32 net_seq); + int (*recheck)(struct xfrm_state *x, + struct sk_buff *skb, + __be32 net_seq); void (*notify)(struct xfrm_state *x, int event); int (*overflow)(struct xfrm_state *x, struct sk_buff *skb); }; diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 54a0dc2e2f8d..ab2bb42fe094 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -212,7 +212,7 @@ resume: /* only the first xfrm gets the encap type */ encap_type = 0; - if (async && x->repl->check(x, skb, seq)) { + if (async && x->repl->recheck(x, skb, seq)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR); goto drop_unlock; } diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index 2f6d11d04a2b..3efb07d3eb27 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -420,6 +420,18 @@ err: return -EINVAL; } +static int xfrm_replay_recheck_esn(struct xfrm_state *x, + struct sk_buff *skb, __be32 net_seq) +{ + if (unlikely(XFRM_SKB_CB(skb)->seq.input.hi != + htonl(xfrm_replay_seqhi(x, net_seq)))) { + x->stats.replay_window++; + return -EINVAL; + } + + return xfrm_replay_check_esn(x, skb, net_seq); +} + static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq) { unsigned int bitnr, nr, i; @@ -479,6 +491,7 @@ static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq) static struct xfrm_replay xfrm_replay_legacy = { .advance = xfrm_replay_advance, .check = xfrm_replay_check, + .recheck = xfrm_replay_check, .notify = xfrm_replay_notify, .overflow = xfrm_replay_overflow, }; @@ -486,6 +499,7 @@ static struct xfrm_replay xfrm_replay_legacy = { static struct xfrm_replay xfrm_replay_bmp = { .advance = xfrm_replay_advance_bmp, .check = xfrm_replay_check_bmp, + .recheck = xfrm_replay_check_bmp, .notify = xfrm_replay_notify_bmp, .overflow = xfrm_replay_overflow_bmp, }; @@ -493,6 +507,7 @@ static struct xfrm_replay xfrm_replay_bmp = { static struct xfrm_replay xfrm_replay_esn = { .advance = xfrm_replay_advance_esn, .check = xfrm_replay_check_esn, + .recheck = xfrm_replay_recheck_esn, .notify = xfrm_replay_notify_bmp, .overflow = xfrm_replay_overflow_esn, }; -- cgit v1.2.3 From c0cc88a7627c333de50b07b7c60b1d49d9d2e6cc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 4 Sep 2012 15:54:55 -0400 Subject: l2tp: fix a typo in l2tp_eth_dev_recv() While investigating l2tp bug, I hit a bug in eth_type_trans(), because not enough bytes were pulled in skb head. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/l2tp/l2tp_eth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index f9ee74deeac2..3bfb34aaee29 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -153,7 +153,7 @@ static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb, print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, skb->data, length); } - if (!pskb_may_pull(skb, sizeof(ETH_HLEN))) + if (!pskb_may_pull(skb, ETH_HLEN)) goto error; secpath_reset(skb); -- cgit v1.2.3 From 7ce8c7a3433c6d6f4adfec0611d250782f0b4b0c Mon Sep 17 00:00:00 2001 From: LEO Airwarosu Yoichi Shinoda Date: Mon, 27 Aug 2012 22:28:16 +0900 Subject: mac80211: Various small fixes for cfg.c: mpath_set_pinfo() Various small fixes for net/mac80211/cfg.c:mpath_set_pinfo(): Initialize *pinfo before filling members in, handle MESH_PATH_RESOLVED correctly, and remove bogus assignment; result in correct display of FLAGS values and meaningful EXPTIME for expired paths in iw utility. Signed-off-by: Yoichi Shinoda Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index d41974aacf51..a58c0b649ba1 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1378,6 +1378,8 @@ static void mpath_set_pinfo(struct mesh_path *mpath, u8 *next_hop, else memset(next_hop, 0, ETH_ALEN); + memset(pinfo, 0, sizeof(*pinfo)); + pinfo->generation = mesh_paths_generation; pinfo->filled = MPATH_INFO_FRAME_QLEN | @@ -1396,7 +1398,6 @@ static void mpath_set_pinfo(struct mesh_path *mpath, u8 *next_hop, pinfo->discovery_timeout = jiffies_to_msecs(mpath->discovery_timeout); pinfo->discovery_retries = mpath->discovery_retries; - pinfo->flags = 0; if (mpath->flags & MESH_PATH_ACTIVE) pinfo->flags |= NL80211_MPATH_FLAG_ACTIVE; if (mpath->flags & MESH_PATH_RESOLVING) @@ -1405,10 +1406,8 @@ static void mpath_set_pinfo(struct mesh_path *mpath, u8 *next_hop, pinfo->flags |= NL80211_MPATH_FLAG_SN_VALID; if (mpath->flags & MESH_PATH_FIXED) pinfo->flags |= NL80211_MPATH_FLAG_FIXED; - if (mpath->flags & MESH_PATH_RESOLVING) - pinfo->flags |= NL80211_MPATH_FLAG_RESOLVING; - - pinfo->flags = mpath->flags; + if (mpath->flags & MESH_PATH_RESOLVED) + pinfo->flags |= NL80211_MPATH_FLAG_RESOLVED; } static int ieee80211_get_mpath(struct wiphy *wiphy, struct net_device *dev, -- cgit v1.2.3 From d013ef2aba8fe765ca683598e404203215632373 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Wed, 5 Sep 2012 10:53:18 +0000 Subject: tcp: fix possible socket refcount problem for ipv6 commit 144d56e91044181ec0ef67aeca91e9a8b5718348 ("tcp: fix possible socket refcount problem") is missing the IPv6 part. As tcp_release_cb is shared by both protocols we should hold sock reference for the TCP_MTU_REDUCED_DEFERRED bit. Signed-off-by: Julian Anastasov Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index a3e60cc04a8a..acd32e3f1b68 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -403,8 +403,9 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, tp->mtu_info = ntohl(info); if (!sock_owned_by_user(sk)) tcp_v6_mtu_reduced(sk); - else - set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags); + else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, + &tp->tsq_flags)) + sock_hold(sk); goto out; } -- cgit v1.2.3 From 0626af3139572610b56376580d11eb65d45d9dd7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 4 Sep 2012 07:49:03 +0000 Subject: netfilter: take care of timewait sockets Sami Farin reported crashes in xt_LOG because it assumes skb->sk is a full blown socket. Since (41063e9 ipv4: Early TCP socket demux), we can have skb->sk pointing to a timewait socket. Same fix is needed in nfnetlink_log. Diagnosed-by: Florian Westphal Reported-by: Sami Farin Signed-off-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_log.c | 14 ++++++++------ net/netfilter/xt_LOG.c | 33 +++++++++++++++++---------------- 2 files changed, 25 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 14e2f3903142..5cfb5bedb2b8 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -381,6 +381,7 @@ __build_packet_message(struct nfulnl_instance *inst, struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; sk_buff_data_t old_tail = inst->skb->tail; + struct sock *sk; nlh = nlmsg_put(inst->skb, 0, 0, NFNL_SUBSYS_ULOG << 8 | NFULNL_MSG_PACKET, @@ -499,18 +500,19 @@ __build_packet_message(struct nfulnl_instance *inst, } /* UID */ - if (skb->sk) { - read_lock_bh(&skb->sk->sk_callback_lock); - if (skb->sk->sk_socket && skb->sk->sk_socket->file) { - struct file *file = skb->sk->sk_socket->file; + sk = skb->sk; + if (sk && sk->sk_state != TCP_TIME_WAIT) { + read_lock_bh(&sk->sk_callback_lock); + if (sk->sk_socket && sk->sk_socket->file) { + struct file *file = sk->sk_socket->file; __be32 uid = htonl(file->f_cred->fsuid); __be32 gid = htonl(file->f_cred->fsgid); - read_unlock_bh(&skb->sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); if (nla_put_be32(inst->skb, NFULA_UID, uid) || nla_put_be32(inst->skb, NFULA_GID, gid)) goto nla_put_failure; } else - read_unlock_bh(&skb->sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); } /* local sequence number */ diff --git a/net/netfilter/xt_LOG.c b/net/netfilter/xt_LOG.c index ff5f75fddb15..2a4f9693e799 100644 --- a/net/netfilter/xt_LOG.c +++ b/net/netfilter/xt_LOG.c @@ -145,6 +145,19 @@ static int dump_tcp_header(struct sbuff *m, const struct sk_buff *skb, return 0; } +static void dump_sk_uid_gid(struct sbuff *m, struct sock *sk) +{ + if (!sk || sk->sk_state == TCP_TIME_WAIT) + return; + + read_lock_bh(&sk->sk_callback_lock); + if (sk->sk_socket && sk->sk_socket->file) + sb_add(m, "UID=%u GID=%u ", + sk->sk_socket->file->f_cred->fsuid, + sk->sk_socket->file->f_cred->fsgid); + read_unlock_bh(&sk->sk_callback_lock); +} + /* One level of recursion won't kill us */ static void dump_ipv4_packet(struct sbuff *m, const struct nf_loginfo *info, @@ -361,14 +374,8 @@ static void dump_ipv4_packet(struct sbuff *m, } /* Max length: 15 "UID=4294967295 " */ - if ((logflags & XT_LOG_UID) && !iphoff && skb->sk) { - read_lock_bh(&skb->sk->sk_callback_lock); - if (skb->sk->sk_socket && skb->sk->sk_socket->file) - sb_add(m, "UID=%u GID=%u ", - skb->sk->sk_socket->file->f_cred->fsuid, - skb->sk->sk_socket->file->f_cred->fsgid); - read_unlock_bh(&skb->sk->sk_callback_lock); - } + if ((logflags & XT_LOG_UID) && !iphoff) + dump_sk_uid_gid(m, skb->sk); /* Max length: 16 "MARK=0xFFFFFFFF " */ if (!iphoff && skb->mark) @@ -717,14 +724,8 @@ static void dump_ipv6_packet(struct sbuff *m, } /* Max length: 15 "UID=4294967295 " */ - if ((logflags & XT_LOG_UID) && recurse && skb->sk) { - read_lock_bh(&skb->sk->sk_callback_lock); - if (skb->sk->sk_socket && skb->sk->sk_socket->file) - sb_add(m, "UID=%u GID=%u ", - skb->sk->sk_socket->file->f_cred->fsuid, - skb->sk->sk_socket->file->f_cred->fsgid); - read_unlock_bh(&skb->sk->sk_callback_lock); - } + if ((logflags & XT_LOG_UID) && recurse) + dump_sk_uid_gid(m, skb->sk); /* Max length: 16 "MARK=0xFFFFFFFF " */ if (!recurse && skb->mark) -- cgit v1.2.3 From 979402b16cde048ced4839e21cc49e0779352b80 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 5 Sep 2012 23:34:44 +0000 Subject: udp: increment UDP_MIB_INERRORS if copy failed In UDP recvmsg(), we miss an increase of UDP_MIB_INERRORS if the copy of skb to userspace failed for whatever reason. Reported-by: Shawn Bohrer Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/udp.c | 5 +++++ net/ipv6/udp.c | 11 +++++++++++ 2 files changed, 16 insertions(+) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 6f6d1aca3c3d..2814f66dac64 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1226,6 +1226,11 @@ try_again: if (unlikely(err)) { trace_kfree_skb(skb, udp_recvmsg); + if (!peeked) { + atomic_inc(&sk->sk_drops); + UDP_INC_STATS_USER(sock_net(sk), + UDP_MIB_INERRORS, is_udplite); + } goto out_free; } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 99d0077b56b8..07e2bfef6845 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -394,6 +394,17 @@ try_again: } if (unlikely(err)) { trace_kfree_skb(skb, udpv6_recvmsg); + if (!peeked) { + atomic_inc(&sk->sk_drops); + if (is_udp4) + UDP_INC_STATS_USER(sock_net(sk), + UDP_MIB_INERRORS, + is_udplite); + else + UDP6_INC_STATS_USER(sock_net(sk), + UDP_MIB_INERRORS, + is_udplite); + } goto out_free; } if (!peeked) { -- cgit v1.2.3 From 6862234238e84648c305526af2edd98badcad1e0 Mon Sep 17 00:00:00 2001 From: Chema Gonzalez Date: Fri, 7 Sep 2012 13:40:50 +0000 Subject: net: small bug on rxhash calculation In the current rxhash calculation function, while the sorting of the ports/addrs is coherent (you get the same rxhash for packets sharing the same 4-tuple, in both directions), ports and addrs are sorted independently. This implies packets from a connection between the same addresses but crossed ports hash to the same rxhash. For example, traffic between A=S:l and B=L:s is hashed (in both directions) from {L, S, {s, l}}. The same rxhash is obtained for packets between C=S:s and D=L:l. This patch ensures that you either swap both addrs and ports, or you swap none. Traffic between A and B, and traffic between C and D, get their rxhash from different sources ({L, S, {l, s}} for A<->B, and {L, S, {s, l}} for C<->D) The patch is co-written with Eric Dumazet Signed-off-by: Chema Gonzalez Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 83988362805e..d7fe32c946c1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2647,15 +2647,16 @@ void __skb_get_rxhash(struct sk_buff *skb) if (!skb_flow_dissect(skb, &keys)) return; - if (keys.ports) { - if ((__force u16)keys.port16[1] < (__force u16)keys.port16[0]) - swap(keys.port16[0], keys.port16[1]); + if (keys.ports) skb->l4_rxhash = 1; - } /* get a consistent hash (same value on both flow directions) */ - if ((__force u32)keys.dst < (__force u32)keys.src) + if (((__force u32)keys.dst < (__force u32)keys.src) || + (((__force u32)keys.dst == (__force u32)keys.src) && + ((__force u16)keys.port16[1] < (__force u16)keys.port16[0]))) { swap(keys.dst, keys.src); + swap(keys.port16[0], keys.port16[1]); + } hash = jhash_3words((__force u32)keys.dst, (__force u32)keys.src, -- cgit v1.2.3 From 64f509ce71b08d037998e93dd51180c19b2f464c Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Fri, 31 Aug 2012 09:55:53 +0000 Subject: netfilter: Mark SYN/ACK packets as invalid from original direction Clients should not send such packets. By accepting them, we open up a hole by wich ephemeral ports can be discovered in an off-path attack. See: "Reflection scan: an Off-Path Attack on TCP" by Jan Wrobel, http://arxiv.org/abs/1201.2074 Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_proto_tcp.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index a5ac11ebef33..aba98f942979 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -158,21 +158,18 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { * sCL -> sSS */ /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ -/*synack*/ { sIV, sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG, sSR }, +/*synack*/ { sIV, sIV, sSR, sIV, sIV, sIV, sIV, sIV, sIV, sSR }, /* * sNO -> sIV Too late and no reason to do anything * sSS -> sIV Client can't send SYN and then SYN/ACK * sS2 -> sSR SYN/ACK sent to SYN2 in simultaneous open - * sSR -> sIG - * sES -> sIG Error: SYNs in window outside the SYN_SENT state - * are errors. Receiver will reply with RST - * and close the connection. - * Or we are not in sync and hold a dead connection. - * sFW -> sIG - * sCW -> sIG - * sLA -> sIG - * sTW -> sIG - * sCL -> sIG + * sSR -> sSR Late retransmitted SYN/ACK in simultaneous open + * sES -> sIV Invalid SYN/ACK packets sent by the client + * sFW -> sIV + * sCW -> sIV + * sLA -> sIV + * sTW -> sIV + * sCL -> sIV */ /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ /*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV }, -- cgit v1.2.3 From 4a70bbfaef0361d27272629d1a250a937edcafe4 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Fri, 31 Aug 2012 09:55:54 +0000 Subject: netfilter: Validate the sequence number of dataless ACK packets as well We spare nothing by not validating the sequence number of dataless ACK packets and enabling it makes harder off-path attacks. See: "Reflection scan: an Off-Path Attack on TCP" by Jan Wrobel, http://arxiv.org/abs/1201.2074 Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_proto_tcp.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index aba98f942979..e046b3756aab 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -630,15 +630,9 @@ static bool tcp_in_window(const struct nf_conn *ct, ack = sack = receiver->td_end; } - if (seq == end - && (!tcph->rst - || (seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT))) + if (tcph->rst && seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT) /* - * Packets contains no data: we assume it is valid - * and check the ack value only. - * However RST segments are always validated by their - * SEQ number, except when seq == 0 (reset sent answering - * SYN. + * RST sent answering SYN. */ seq = end = sender->td_end; -- cgit v1.2.3 From 566f26aa705609d05940289036ab914c8a3be707 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sun, 9 Sep 2012 18:38:27 +0000 Subject: caif: move the dereference below the NULL test The dereference should be moved below the NULL test. spatch with a semantic match is used to found this. (http://coccinelle.lip6.fr/) Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- net/caif/cfsrvl.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c index dd485f6128e8..ba217e90765e 100644 --- a/net/caif/cfsrvl.c +++ b/net/caif/cfsrvl.c @@ -211,9 +211,10 @@ void caif_client_register_refcnt(struct cflayer *adapt_layer, void (*put)(struct cflayer *lyr)) { struct cfsrvl *service; - service = container_of(adapt_layer->dn, struct cfsrvl, layer); - WARN_ON(adapt_layer == NULL || adapt_layer->dn == NULL); + if (WARN_ON(adapt_layer == NULL || adapt_layer->dn == NULL)) + return; + service = container_of(adapt_layer->dn, struct cfsrvl, layer); service->hold = hold; service->put = put; } -- cgit v1.2.3 From 1c463e57b32e3b6a3250fe75d1d56cb598d664e6 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 10 Sep 2012 09:13:07 -0700 Subject: net: fix net/core/sock.c build error Fix net/core/sock.c build error when CONFIG_INET is not enabled: net/built-in.o: In function `sock_edemux': (.text+0xd396): undefined reference to `inet_twsk_put' Signed-off-by: Randy Dunlap Signed-off-by: David S. Miller --- net/core/sock.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 7f64467535d1..305792076121 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1525,9 +1525,11 @@ void sock_edemux(struct sk_buff *skb) { struct sock *sk = skb->sk; +#ifdef CONFIG_INET if (sk->sk_state == TCP_TIME_WAIT) inet_twsk_put(inet_twsk(sk)); else +#endif sock_put(sk); } EXPORT_SYMBOL(sock_edemux); -- cgit v1.2.3 From bdfc87f7d1e253e0a61e2fc6a75ea9d76f7fc03a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 11 Sep 2012 13:11:12 +0000 Subject: net-sched: sch_cbq: avoid infinite loop Its possible to setup a bad cbq configuration leading to an infinite loop in cbq_classify() DEV_OUT=eth0 ICMP="match ip protocol 1 0xff" U32="protocol ip u32" DST="match ip dst" tc qdisc add dev $DEV_OUT root handle 1: cbq avpkt 1000 \ bandwidth 100mbit tc class add dev $DEV_OUT parent 1: classid 1:1 cbq \ rate 512kbit allot 1500 prio 5 bounded isolated tc filter add dev $DEV_OUT parent 1: prio 3 $U32 \ $ICMP $DST 192.168.3.234 flowid 1: Reported-by: Denys Fedoryschenko Tested-by: Denys Fedoryschenko Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_cbq.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 6aabd77d1cfd..564b9fc8efd3 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -250,10 +250,11 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) else if ((cl = defmap[res.classid & TC_PRIO_MAX]) == NULL) cl = defmap[TC_PRIO_BESTEFFORT]; - if (cl == NULL || cl->level >= head->level) + if (cl == NULL) goto fallback; } - + if (cl->level >= head->level) + goto fallback; #ifdef CONFIG_NET_CLS_ACT switch (result) { case TC_ACT_QUEUED: -- cgit v1.2.3 From 16af511a666827eaf5802144f09e2fb7b0942c99 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 12 Sep 2012 02:04:53 +0000 Subject: netfilter: log: Fix log-level processing auto75914331@hushmail.com reports that iptables does not correctly output the KERN_. $IPTABLES -A RULE_0_in -j LOG --log-level notice --log-prefix "DENY in: " result with linux 3.6-rc5 Sep 12 06:37:29 xxxxx kernel: <5>DENY in: IN=eth0 OUT= MAC=....... result with linux 3.5.3 and older: Sep 9 10:43:01 xxxxx kernel: DENY in: IN=eth0 OUT= MAC...... commit 04d2c8c83d0 ("printk: convert the format for KERN_ to a 2 byte pattern") updated the syslog header style but did not update netfilter uses. Do so. Use KERN_SOH and string concatenation instead of "%c" KERN_SOH_ASCII as suggested by Eric Dumazet. Signed-off-by: Joe Perches cc: auto75914331@hushmail.com Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebt_log.c | 2 +- net/netfilter/xt_LOG.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c index f88ee537fb2b..92de5e5f9db2 100644 --- a/net/bridge/netfilter/ebt_log.c +++ b/net/bridge/netfilter/ebt_log.c @@ -80,7 +80,7 @@ ebt_log_packet(u_int8_t pf, unsigned int hooknum, unsigned int bitmask; spin_lock_bh(&ebt_log_lock); - printk("<%c>%s IN=%s OUT=%s MAC source = %pM MAC dest = %pM proto = 0x%04x", + printk(KERN_SOH "%c%s IN=%s OUT=%s MAC source = %pM MAC dest = %pM proto = 0x%04x", '0' + loginfo->u.log.level, prefix, in ? in->name : "", out ? out->name : "", eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest, diff --git a/net/netfilter/xt_LOG.c b/net/netfilter/xt_LOG.c index 2a4f9693e799..91e9af4d1f42 100644 --- a/net/netfilter/xt_LOG.c +++ b/net/netfilter/xt_LOG.c @@ -443,8 +443,8 @@ log_packet_common(struct sbuff *m, const struct nf_loginfo *loginfo, const char *prefix) { - sb_add(m, "<%d>%sIN=%s OUT=%s ", loginfo->u.log.level, - prefix, + sb_add(m, KERN_SOH "%c%sIN=%s OUT=%s ", + '0' + loginfo->u.log.level, prefix, in ? in->name : "", out ? out->name : ""); #ifdef CONFIG_BRIDGE_NETFILTER -- cgit v1.2.3 From e29fe837bfa3ca0a9f4ef1d4a90e6e0a74b6b8a0 Mon Sep 17 00:00:00 2001 From: David Ward Date: Thu, 13 Sep 2012 05:22:32 +0000 Subject: net_sched: gred: correct comment about qavg calculation in RIO mode Signed-off-by: David Ward Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/sch_gred.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index e901583e4ea5..fca73cdf44d9 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -176,7 +176,7 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch) skb->tc_index = (skb->tc_index & ~GRED_VQ_MASK) | dp; } - /* sum up all the qaves of prios <= to ours to get the new qave */ + /* sum up all the qaves of prios < ours to get the new qave */ if (!gred_wred_mode(t) && gred_rio_mode(t)) { int i; -- cgit v1.2.3 From c22e464022f935b0cbd8724b1d99d800d49518a9 Mon Sep 17 00:00:00 2001 From: David Ward Date: Thu, 13 Sep 2012 05:22:33 +0000 Subject: net_sched: gred: eliminate redundant DP prio comparisons Each pair of DPs only needs to be compared once when searching for a non-unique prio value. Signed-off-by: David Ward Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/sch_gred.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index fca73cdf44d9..e19d4ebfea1c 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -102,9 +102,8 @@ static inline int gred_wred_mode_check(struct Qdisc *sch) if (q == NULL) continue; - for (n = 0; n < table->DPs; n++) - if (table->tab[n] && table->tab[n] != q && - table->tab[n]->prio == q->prio) + for (n = i + 1; n < table->DPs; n++) + if (table->tab[n] && table->tab[n]->prio == q->prio) return 1; } -- cgit v1.2.3 From 1fe37b106b039d9358fd1211c39b1fa199e547a8 Mon Sep 17 00:00:00 2001 From: David Ward Date: Thu, 13 Sep 2012 05:22:34 +0000 Subject: net_sched: gred: fix qave reporting via netlink q->vars.qavg is a Wlog scaled value, but q->backlog is not. In order to pass q->vars.qavg as the backlog value, we need to un-scale it. Additionally, the qave value returned via netlink should not be Wlog scaled, so we need to un-scale the result of red_calc_qavg(). This caused artificially high values for "Average Queue" to be shown by 'tc -s -d qdisc', but did not affect the actual operation of GRED. Signed-off-by: David Ward Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/sch_gred.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index e19d4ebfea1c..b2570b59d85e 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -534,6 +534,7 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb) for (i = 0; i < MAX_DPs; i++) { struct gred_sched_data *q = table->tab[i]; struct tc_gred_qopt opt; + unsigned long qavg; memset(&opt, 0, sizeof(opt)); @@ -565,7 +566,9 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb) if (gred_wred_mode(table)) gred_load_wred_set(table, q); - opt.qave = red_calc_qavg(&q->parms, &q->vars, q->vars.qavg); + qavg = red_calc_qavg(&q->parms, &q->vars, + q->vars.qavg >> q->parms.Wlog); + opt.qave = qavg >> q->parms.Wlog; append_opt: if (nla_append(skb, sizeof(opt), &opt) < 0) -- cgit v1.2.3 From ba1bf474eae07a128fae6252bf7d68eaef0eacf8 Mon Sep 17 00:00:00 2001 From: David Ward Date: Thu, 13 Sep 2012 05:22:35 +0000 Subject: net_sched: gred: actually perform idling in WRED mode gred_dequeue() and gred_drop() do not seem to get called when the queue is empty, meaning that we never start idling while in WRED mode. And since qidlestart is not stored by gred_store_wred_set(), we would never stop idling while in WRED mode if we ever started. This messes up the average queue size calculation that influences packet marking/dropping behavior. Now, we start WRED mode idling as we are removing the last packet from the queue. Also we now actually stop WRED mode idling when we are enqueuing a packet. Cc: Bruce Osler Signed-off-by: David Ward Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/sch_gred.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index b2570b59d85e..d42234c0f13b 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -136,6 +136,7 @@ static inline void gred_store_wred_set(struct gred_sched *table, struct gred_sched_data *q) { table->wred_set.qavg = q->vars.qavg; + table->wred_set.qidlestart = q->vars.qidlestart; } static inline int gred_use_ecn(struct gred_sched *t) @@ -259,16 +260,18 @@ static struct sk_buff *gred_dequeue(struct Qdisc *sch) } else { q->backlog -= qdisc_pkt_len(skb); - if (!q->backlog && !gred_wred_mode(t)) - red_start_of_idle_period(&q->vars); + if (gred_wred_mode(t)) { + if (!sch->qstats.backlog) + red_start_of_idle_period(&t->wred_set); + } else { + if (!q->backlog) + red_start_of_idle_period(&q->vars); + } } return skb; } - if (gred_wred_mode(t) && !red_is_idling(&t->wred_set)) - red_start_of_idle_period(&t->wred_set); - return NULL; } @@ -290,19 +293,20 @@ static unsigned int gred_drop(struct Qdisc *sch) q->backlog -= len; q->stats.other++; - if (!q->backlog && !gred_wred_mode(t)) - red_start_of_idle_period(&q->vars); + if (gred_wred_mode(t)) { + if (!sch->qstats.backlog) + red_start_of_idle_period(&t->wred_set); + } else { + if (!q->backlog) + red_start_of_idle_period(&q->vars); + } } qdisc_drop(skb, sch); return len; } - if (gred_wred_mode(t) && !red_is_idling(&t->wred_set)) - red_start_of_idle_period(&t->wred_set); - return 0; - } static void gred_reset(struct Qdisc *sch) -- cgit v1.2.3 From 6af773e786ad617b0264ebe06ba60675c01f3e51 Mon Sep 17 00:00:00 2001 From: Nishank Trivedi Date: Wed, 12 Sep 2012 13:32:49 +0000 Subject: pktgen: fix crash with vlan and packet size less than 46 If vlan option is being specified in the pktgen and packet size being requested is less than 46 bytes, despite being illogical request, pktgen should not crash the kernel. BUG: unable to handle kernel paging request at ffff88021fb82000 Process kpktgend_0 (pid: 1184, threadinfo ffff880215f1a000, task ffff880218544530) Call Trace: [] ? pktgen_finalize_skb+0x222/0x300 [pktgen] [] ? build_skb+0x34/0x1c0 [] pktgen_thread_worker+0x5d1/0x1790 [pktgen] [] ? igb_xmit_frame_ring+0xa30/0xa30 [igb] [] ? wake_up_bit+0x40/0x40 [] ? wake_up_bit+0x40/0x40 [] ? spin+0x240/0x240 [pktgen] [] kthread+0x93/0xa0 [] kernel_thread_helper+0x4/0x10 [] ? flush_kthread_worker+0x80/0x80 [] ? gs_change+0x13/0x13 The root cause of why pktgen is not able to handle this case is due to comparison of signed (datalen) and unsigned data (sizeof), which eventually passes a huge number to skb_put(). Signed-off-by: Nishank Trivedi Signed-off-by: David S. Miller --- net/core/pktgen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/pktgen.c b/net/core/pktgen.c index cce9e53528b1..148e73d2c451 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2721,7 +2721,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, /* Eth + IPh + UDPh + mpls */ datalen = pkt_dev->cur_pkt_size - 14 - 20 - 8 - pkt_dev->pkt_overhead; - if (datalen < sizeof(struct pktgen_hdr)) + if (datalen < 0 || datalen < sizeof(struct pktgen_hdr)) datalen = sizeof(struct pktgen_hdr); udph->source = htons(pkt_dev->cur_udp_src); -- cgit v1.2.3