From 07bf7908950a8b14e81aa1807e3c667eab39287a Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 1 Aug 2018 13:45:11 +0200 Subject: xfrm: Validate address prefix lengths in the xfrm selector. We don't validate the address prefix lengths in the xfrm selector we got from userspace. This can lead to undefined behaviour in the address matching functions if the prefix is too big for the given address family. Fix this by checking the prefixes and refuse SA/policy insertation when a prefix is invalid. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Air Icy Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_user.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 33878e6e0d0a..5151b3ebf068 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -151,10 +151,16 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, err = -EINVAL; switch (p->family) { case AF_INET: + if (p->sel.prefixlen_d > 32 || p->sel.prefixlen_s > 32) + goto out; + break; case AF_INET6: #if IS_ENABLED(CONFIG_IPV6) + if (p->sel.prefixlen_d > 128 || p->sel.prefixlen_s > 128) + goto out; + break; #else err = -EAFNOSUPPORT; @@ -1359,10 +1365,16 @@ static int verify_newpolicy_info(struct xfrm_userpolicy_info *p) switch (p->sel.family) { case AF_INET: + if (p->sel.prefixlen_d > 32 || p->sel.prefixlen_s > 32) + return -EINVAL; + break; case AF_INET6: #if IS_ENABLED(CONFIG_IPV6) + if (p->sel.prefixlen_d > 128 || p->sel.prefixlen_s > 128) + return -EINVAL; + break; #else return -EAFNOSUPPORT; -- cgit v1.2.3 From 215ab0f021c9fea3c18b75e7d522400ee6a49990 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Fri, 31 Aug 2018 08:38:49 -0300 Subject: xfrm6: call kfree_skb when skb is toobig After commit d6990976af7c5d8f55903bfb4289b6fb030bf754 ("vti6: fix PMTU caching and reporting on xmit"), some too big skbs might be potentially passed down to __xfrm6_output, causing it to fail to transmit but not free the skb, causing a leak of skb, and consequentially a leak of dst references. After running pmtu.sh, that shows as failure to unregister devices in a namespace: [ 311.397671] unregister_netdevice: waiting for veth_b to become free. Usage count = 1 The fix is to call kfree_skb in case of transmit failures. Fixes: dd767856a36e ("xfrm6: Don't call icmpv6_send on local error") Signed-off-by: Thadeu Lima de Souza Cascardo Reviewed-by: Sabrina Dubroca Signed-off-by: Steffen Klassert --- net/ipv6/xfrm6_output.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 5959ce9620eb..6a74080005cf 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -170,9 +170,11 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb) if (toobig && xfrm6_local_dontfrag(skb)) { xfrm6_local_rxpmtu(skb, mtu); + kfree_skb(skb); return -EMSGSIZE; } else if (!skb->ignore_df && toobig && skb->sk) { xfrm_local_error(skb, mtu); + kfree_skb(skb); return -EMSGSIZE; } -- cgit v1.2.3 From bfc0698bebcb16d19ecfc89574ad4d696955e5d3 Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Mon, 3 Sep 2018 04:36:52 -0700 Subject: xfrm: reset transport header back to network header after all input transforms ahave been applied A policy may have been set up with multiple transforms (e.g., ESP and ipcomp). In this situation, the ingress IPsec processing iterates in xfrm_input() and applies each transform in turn, processing the nexthdr to find any additional xfrm that may apply. This patch resets the transport header back to network header only after the last transformation so that subsequent xfrms can find the correct transport header. Fixes: 7785bba299a8 ("esp: Add a software GRO codepath") Suggested-by: Steffen Klassert Signed-off-by: Sowmini Varadhan Signed-off-by: Steffen Klassert --- net/ipv4/xfrm4_input.c | 1 + net/ipv4/xfrm4_mode_transport.c | 4 +--- net/ipv6/xfrm6_input.c | 1 + net/ipv6/xfrm6_mode_transport.c | 4 +--- 4 files changed, 4 insertions(+), 6 deletions(-) diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index bcfc00e88756..f8de2482a529 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -67,6 +67,7 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async) if (xo && (xo->flags & XFRM_GRO)) { skb_mac_header_rebuild(skb); + skb_reset_transport_header(skb); return 0; } diff --git a/net/ipv4/xfrm4_mode_transport.c b/net/ipv4/xfrm4_mode_transport.c index 3d36644890bb..1ad2c2c4e250 100644 --- a/net/ipv4/xfrm4_mode_transport.c +++ b/net/ipv4/xfrm4_mode_transport.c @@ -46,7 +46,6 @@ static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb) static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb) { int ihl = skb->data - skb_transport_header(skb); - struct xfrm_offload *xo = xfrm_offload(skb); if (skb->transport_header != skb->network_header) { memmove(skb_transport_header(skb), @@ -54,8 +53,7 @@ static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb) skb->network_header = skb->transport_header; } ip_hdr(skb)->tot_len = htons(skb->len + ihl); - if (!xo || !(xo->flags & XFRM_GRO)) - skb_reset_transport_header(skb); + skb_reset_transport_header(skb); return 0; } diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 841f4a07438e..9ef490dddcea 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -59,6 +59,7 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async) if (xo && (xo->flags & XFRM_GRO)) { skb_mac_header_rebuild(skb); + skb_reset_transport_header(skb); return -1; } diff --git a/net/ipv6/xfrm6_mode_transport.c b/net/ipv6/xfrm6_mode_transport.c index 9ad07a91708e..3c29da5defe6 100644 --- a/net/ipv6/xfrm6_mode_transport.c +++ b/net/ipv6/xfrm6_mode_transport.c @@ -51,7 +51,6 @@ static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb) static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb) { int ihl = skb->data - skb_transport_header(skb); - struct xfrm_offload *xo = xfrm_offload(skb); if (skb->transport_header != skb->network_header) { memmove(skb_transport_header(skb), @@ -60,8 +59,7 @@ static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb) } ipv6_hdr(skb)->payload_len = htons(skb->len + ihl - sizeof(struct ipv6hdr)); - if (!xo || !(xo->flags & XFRM_GRO)) - skb_reset_transport_header(skb); + skb_reset_transport_header(skb); return 0; } -- cgit v1.2.3 From 782710e333a526780d65918d669cb96646983ba2 Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Mon, 3 Sep 2018 04:36:53 -0700 Subject: xfrm: reset crypto_done when iterating over multiple input xfrms We only support one offloaded xfrm (we do not have devices that can handle more than one offload), so reset crypto_done in xfrm_input() when iterating over multiple transforms in xfrm_input, so that we can invoke the appropriate x->type->input for the non-offloaded transforms Fixes: d77e38e612a0 ("xfrm: Add an IPsec hardware offloading API") Signed-off-by: Sowmini Varadhan Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_input.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 352abca2605f..86f5afbd0a0c 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -453,6 +453,7 @@ resume: XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); goto drop; } + crypto_done = false; } while (!err); err = xfrm_rcv_cb(skb, family, x->type->proto, 0); -- cgit v1.2.3 From 8682250b3c1b75a45feb7452bc413d004cfe3778 Mon Sep 17 00:00:00 2001 From: Andrei Otcheretianski Date: Wed, 5 Sep 2018 08:06:13 +0300 Subject: mac80211: Always report TX status If a frame is dropped for any reason, mac80211 wouldn't report the TX status back to user space. As the user space may rely on the TX_STATUS to kick its state machines, resends etc, it's better to just report this frame as not acked instead. Signed-off-by: Andrei Otcheretianski Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- net/mac80211/status.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 9a6d7208bf4f..001a869c059c 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -479,11 +479,6 @@ static void ieee80211_report_ack_skb(struct ieee80211_local *local, if (!skb) return; - if (dropped) { - dev_kfree_skb_any(skb); - return; - } - if (info->flags & IEEE80211_TX_INTFL_NL80211_FRAME_TX) { u64 cookie = IEEE80211_SKB_CB(skb)->ack.cookie; struct ieee80211_sub_if_data *sdata; @@ -506,6 +501,8 @@ static void ieee80211_report_ack_skb(struct ieee80211_local *local, } rcu_read_unlock(); + dev_kfree_skb_any(skb); + } else if (dropped) { dev_kfree_skb_any(skb); } else { /* consumes skb */ -- cgit v1.2.3 From 94a5b3acd0aef83c0e38b5117eda7b2abf4a05a4 Mon Sep 17 00:00:00 2001 From: Andrei Otcheretianski Date: Wed, 5 Sep 2018 08:06:14 +0300 Subject: mac80211: Don't wake up from PS for offchannel TX Otherwise the offchannel frame might be queued due to IEEE80211_QUEUE_STOP_REASON_PS and later dropped (in ieee80211_tx_frags()). Anyway, it doesn't make much sense to wake up the device during ROC. Signed-off-by: Andrei Otcheretianski Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index f353d9db54bc..131542513c8f 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -214,6 +214,7 @@ ieee80211_tx_h_dynamic_ps(struct ieee80211_tx_data *tx) { struct ieee80211_local *local = tx->local; struct ieee80211_if_managed *ifmgd; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); /* driver doesn't support power save */ if (!ieee80211_hw_check(&local->hw, SUPPORTS_PS)) @@ -242,6 +243,9 @@ ieee80211_tx_h_dynamic_ps(struct ieee80211_tx_data *tx) if (tx->sdata->vif.type != NL80211_IFTYPE_STATION) return TX_CONTINUE; + if (unlikely(info->flags & IEEE80211_TX_INTFL_OFFCHAN_TX_OK)) + return TX_CONTINUE; + ifmgd = &tx->sdata->u.mgd; /* -- cgit v1.2.3 From 24f33e64fcd0d50a4b1a8e5b41bd0257aa66b0e8 Mon Sep 17 00:00:00 2001 From: Andrei Otcheretianski Date: Wed, 5 Sep 2018 08:06:12 +0300 Subject: cfg80211: reg: Init wiphy_idx in regulatory_hint_core() Core regulatory hints didn't set wiphy_idx to WIPHY_IDX_INVALID. Since the regulatory request is zeroed, wiphy_idx was always implicitly set to 0. This resulted in updating only phy #0. Fix that. Fixes: 806a9e39670b ("cfg80211: make regulatory_request use wiphy_idx instead of wiphy") Signed-off-by: Andrei Otcheretianski Signed-off-by: Luca Coelho [add fixes tag] Signed-off-by: Johannes Berg --- net/wireless/reg.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 2f702adf2912..765dedb12361 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -2867,6 +2867,7 @@ static int regulatory_hint_core(const char *alpha2) request->alpha2[0] = alpha2[0]; request->alpha2[1] = alpha2[1]; request->initiator = NL80211_REGDOM_SET_BY_CORE; + request->wiphy_idx = WIPHY_IDX_INVALID; queue_regulatory_request(request); -- cgit v1.2.3 From 6eae4a6c2be387fec41b0d2782c4fffb57159498 Mon Sep 17 00:00:00 2001 From: Bob Copeland Date: Wed, 5 Sep 2018 06:22:59 -0400 Subject: mac80211: fix pending queue hang due to TX_DROP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In our environment running lots of mesh nodes, we are seeing the pending queue hang periodically, with the debugfs queues file showing lines such as: 00: 0x00000000/348 i.e. there are a large number of frames but no stop reason set. One way this could happen is if queue processing from the pending tasklet exited early without processing all frames, and without having some future event (incoming frame, stop reason flag, ...) to reschedule it. Exactly this can occur today if ieee80211_tx() returns false due to packet drops or power-save buffering in the tx handlers. In the past, this function would return true in such cases, and the change to false doesn't seem to be intentional. Fix this case by reverting to the previous behavior. Fixes: bb42f2d13ffc ("mac80211: Move reorder-sensitive TX handlers to after TXQ dequeue") Signed-off-by: Bob Copeland Acked-by: Toke Høiland-Jørgensen Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 131542513c8f..25ba24bef8f5 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1894,7 +1894,7 @@ static bool ieee80211_tx(struct ieee80211_sub_if_data *sdata, sdata->vif.hw_queue[skb_get_queue_mapping(skb)]; if (invoke_tx_handlers_early(&tx)) - return false; + return true; if (ieee80211_queue_skb(local, sdata, tx.sta, tx.skb)) return true; -- cgit v1.2.3 From 119f94a6fefcc76d47075b83d2b73d04c895df78 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Wed, 5 Sep 2018 18:52:22 +0300 Subject: cfg80211: Address some corner cases in scan result channel updating cfg80211_get_bss_channel() is used to update the RX channel based on the available frame payload information (channel number from DSSS Parameter Set element or HT Operation element). This is needed on 2.4 GHz channels where frames may be received on neighboring channels due to overlapping frequency range. This might of some use on the 5 GHz band in some corner cases, but things are more complex there since there is no n:1 or 1:n mapping between channel numbers and frequencies due to multiple different starting frequencies in different operating classes. This could result in ieee80211_channel_to_frequency() returning incorrect frequency and ieee80211_get_channel() returning incorrect channel information (or indication of no match). In the previous implementation, this could result in some scan results being dropped completely, e.g., for the 4.9 GHz channels. That prevented connection to such BSSs. Fix this by using the driver-provided channel pointer if ieee80211_get_channel() does not find matching channel data for the channel number in the frame payload and if the scan is done with 5 MHz or 10 MHz channel bandwidth. While doing this, also add comments describing what the function is trying to achieve to make it easier to understand what happens here and why. Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg --- net/wireless/scan.c | 58 ++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 49 insertions(+), 9 deletions(-) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index d36c3eb7b931..d0e7472dd9fd 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -1058,13 +1058,23 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev, return NULL; } +/* + * Update RX channel information based on the available frame payload + * information. This is mainly for the 2.4 GHz band where frames can be received + * from neighboring channels and the Beacon frames use the DSSS Parameter Set + * element to indicate the current (transmitting) channel, but this might also + * be needed on other bands if RX frequency does not match with the actual + * operating channel of a BSS. + */ static struct ieee80211_channel * cfg80211_get_bss_channel(struct wiphy *wiphy, const u8 *ie, size_t ielen, - struct ieee80211_channel *channel) + struct ieee80211_channel *channel, + enum nl80211_bss_scan_width scan_width) { const u8 *tmp; u32 freq; int channel_number = -1; + struct ieee80211_channel *alt_channel; tmp = cfg80211_find_ie(WLAN_EID_DS_PARAMS, ie, ielen); if (tmp && tmp[1] == 1) { @@ -1078,16 +1088,45 @@ cfg80211_get_bss_channel(struct wiphy *wiphy, const u8 *ie, size_t ielen, } } - if (channel_number < 0) + if (channel_number < 0) { + /* No channel information in frame payload */ return channel; + } freq = ieee80211_channel_to_frequency(channel_number, channel->band); - channel = ieee80211_get_channel(wiphy, freq); - if (!channel) - return NULL; - if (channel->flags & IEEE80211_CHAN_DISABLED) + alt_channel = ieee80211_get_channel(wiphy, freq); + if (!alt_channel) { + if (channel->band == NL80211_BAND_2GHZ) { + /* + * Better not allow unexpected channels when that could + * be going beyond the 1-11 range (e.g., discovering + * BSS on channel 12 when radio is configured for + * channel 11. + */ + return NULL; + } + + /* No match for the payload channel number - ignore it */ + return channel; + } + + if (scan_width == NL80211_BSS_CHAN_WIDTH_10 || + scan_width == NL80211_BSS_CHAN_WIDTH_5) { + /* + * Ignore channel number in 5 and 10 MHz channels where there + * may not be an n:1 or 1:n mapping between frequencies and + * channel numbers. + */ + return channel; + } + + /* + * Use the channel determined through the payload channel number + * instead of the RX channel reported by the driver. + */ + if (alt_channel->flags & IEEE80211_CHAN_DISABLED) return NULL; - return channel; + return alt_channel; } /* Returned bss is reference counted and must be cleaned up appropriately. */ @@ -1112,7 +1151,8 @@ cfg80211_inform_bss_data(struct wiphy *wiphy, (data->signal < 0 || data->signal > 100))) return NULL; - channel = cfg80211_get_bss_channel(wiphy, ie, ielen, data->chan); + channel = cfg80211_get_bss_channel(wiphy, ie, ielen, data->chan, + data->scan_width); if (!channel) return NULL; @@ -1210,7 +1250,7 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy, return NULL; channel = cfg80211_get_bss_channel(wiphy, mgmt->u.beacon.variable, - ielen, data->chan); + ielen, data->chan, data->scan_width); if (!channel) return NULL; -- cgit v1.2.3 From cb59bc14e830028d2244861216df038165d7625d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 5 Sep 2018 13:34:02 +0200 Subject: mac80211: TDLS: fix skb queue/priority assignment If the TDLS setup happens over a connection to an AP that doesn't have QoS, we nevertheless assign a non-zero TID (skb->priority) and queue mapping, which may confuse us or drivers later. Fix it by just assigning the special skb->priority and then using ieee80211_select_queue() just like other data frames would go through. Signed-off-by: Johannes Berg --- net/mac80211/tdls.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index 5cd5e6e5834e..6c647f425e05 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -16,6 +16,7 @@ #include "ieee80211_i.h" #include "driver-ops.h" #include "rate.h" +#include "wme.h" /* give usermode some time for retries in setting up the TDLS session */ #define TDLS_PEER_SETUP_TIMEOUT (15 * HZ) @@ -1010,14 +1011,13 @@ ieee80211_tdls_prep_mgmt_packet(struct wiphy *wiphy, struct net_device *dev, switch (action_code) { case WLAN_TDLS_SETUP_REQUEST: case WLAN_TDLS_SETUP_RESPONSE: - skb_set_queue_mapping(skb, IEEE80211_AC_BK); - skb->priority = 2; + skb->priority = 256 + 2; break; default: - skb_set_queue_mapping(skb, IEEE80211_AC_VI); - skb->priority = 5; + skb->priority = 256 + 5; break; } + skb_set_queue_mapping(skb, ieee80211_select_queue(sdata, skb)); /* * Set the WLAN_TDLS_TEARDOWN flag to indicate a teardown in progress. -- cgit v1.2.3 From c42055105785580563535e6d3143cad95c7ac7ee Mon Sep 17 00:00:00 2001 From: Yuan-Chi Pang Date: Thu, 6 Sep 2018 16:57:48 +0800 Subject: mac80211: fix TX status reporting for ieee80211s TX status reporting to ieee80211s is through ieee80211s_update_metric. There are two problems about ieee80211s_update_metric: 1. The purpose is to estimate the fail probability to a specific link. No need to restrict to data frame. 2. Current implementation does not work if wireless driver does not pass tx_status with skb. Fix this by removing ieee80211_is_data condition, passing ieee80211_tx_status directly to ieee80211s_update_metric, and putting it in both __ieee80211_tx_status and ieee80211_tx_status_ext. Signed-off-by: Yuan-Chi Pang Signed-off-by: Johannes Berg --- net/mac80211/mesh.h | 3 ++- net/mac80211/mesh_hwmp.c | 9 +++------ net/mac80211/status.c | 4 +++- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index ee56f18cad3f..21526630bf65 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -217,7 +217,8 @@ void mesh_rmc_free(struct ieee80211_sub_if_data *sdata); int mesh_rmc_init(struct ieee80211_sub_if_data *sdata); void ieee80211s_init(void); void ieee80211s_update_metric(struct ieee80211_local *local, - struct sta_info *sta, struct sk_buff *skb); + struct sta_info *sta, + struct ieee80211_tx_status *st); void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata); void ieee80211_mesh_teardown_sdata(struct ieee80211_sub_if_data *sdata); int ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata); diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index daf9db3c8f24..6950cd0bf594 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -295,15 +295,12 @@ int mesh_path_error_tx(struct ieee80211_sub_if_data *sdata, } void ieee80211s_update_metric(struct ieee80211_local *local, - struct sta_info *sta, struct sk_buff *skb) + struct sta_info *sta, + struct ieee80211_tx_status *st) { - struct ieee80211_tx_info *txinfo = IEEE80211_SKB_CB(skb); - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + struct ieee80211_tx_info *txinfo = st->info; int failed; - if (!ieee80211_is_data(hdr->frame_control)) - return; - failed = !(txinfo->flags & IEEE80211_TX_STAT_ACK); /* moving average, scaled to 100. diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 001a869c059c..91d7c0cd1882 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -808,7 +808,7 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw, rate_control_tx_status(local, sband, status); if (ieee80211_vif_is_mesh(&sta->sdata->vif)) - ieee80211s_update_metric(local, sta, skb); + ieee80211s_update_metric(local, sta, status); if (!(info->flags & IEEE80211_TX_CTL_INJECTED) && acked) ieee80211_frame_acked(sta, skb); @@ -969,6 +969,8 @@ void ieee80211_tx_status_ext(struct ieee80211_hw *hw, } rate_control_tx_status(local, sband, status); + if (ieee80211_vif_is_mesh(&sta->sdata->vif)) + ieee80211s_update_metric(local, sta, status); } if (acked || noack_success) { -- cgit v1.2.3 From 9e1437937807b0122e8da1ca8765be2adca9aee6 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Tue, 11 Sep 2018 10:31:15 +0200 Subject: xfrm: Fix NULL pointer dereference when skb_dst_force clears the dst_entry. Since commit 222d7dbd258d ("net: prevent dst uses after free") skb_dst_force() might clear the dst_entry attached to the skb. The xfrm code don't expect this to happen, so we crash with a NULL pointer dereference in this case. Fix it by checking skb_dst(skb) for NULL after skb_dst_force() and drop the packet in cast the dst_entry was cleared. Fixes: 222d7dbd258d ("net: prevent dst uses after free") Reported-by: Tobias Hommel Reported-by: Kristian Evensen Reported-by: Wolfgang Walter Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_output.c | 4 ++++ net/xfrm/xfrm_policy.c | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 89b178a78dc7..36d15a38ce5e 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -101,6 +101,10 @@ static int xfrm_output_one(struct sk_buff *skb, int err) spin_unlock_bh(&x->lock); skb_dst_force(skb); + if (!skb_dst(skb)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); + goto error_nolock; + } if (xfrm_offload(skb)) { x->type_offload->encap(x, skb); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 7c5e8978aeaa..626e0f4d1749 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2548,6 +2548,10 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) } skb_dst_force(skb); + if (!skb_dst(skb)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMFWDHDRERROR); + return 0; + } dst = xfrm_lookup(net, skb_dst(skb), &fl, NULL, XFRM_LOOKUP_QUEUE); if (IS_ERR(dst)) { -- cgit v1.2.3 From 3341ba9f0f2637a51738ef5de5114e13a1cd3eca Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Mon, 3 Sep 2018 12:26:21 +0200 Subject: mt76x0: fix remove_interface We wrongly use wcid_mask instead of vif_mask. This creates problems when the interface is removed more than 8 times, for example with iw: iw dev interface add type iw dev del This caused 'ifconfig up' to fail with error: SIOCSIFFLAGS: No space left on device Fixes: 95e444098a7b ("mt76x0: main file") Reported-and-tested-by: Sid Hayn Signed-off-by: Stanislaw Gruszka Signed-off-by: Kalle Valo --- drivers/net/wireless/mediatek/mt76/mt76x0/main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/main.c b/drivers/net/wireless/mediatek/mt76/mt76x0/main.c index cf6ffb1ba4a2..22bc9d368728 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x0/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x0/main.c @@ -77,9 +77,8 @@ static void mt76x0_remove_interface(struct ieee80211_hw *hw, { struct mt76x0_dev *dev = hw->priv; struct mt76_vif *mvif = (struct mt76_vif *) vif->drv_priv; - unsigned int wcid = mvif->group_wcid.idx; - dev->wcid_mask[wcid / BITS_PER_LONG] &= ~BIT(wcid % BITS_PER_LONG); + dev->vif_mask &= ~BIT(mvif->idx); } static int mt76x0_config(struct ieee80211_hw *hw, u32 changed) -- cgit v1.2.3 From 32bf94fb5c2ec4ec842152d0e5937cd4bb6738fa Mon Sep 17 00:00:00 2001 From: Sean Tranchetti Date: Wed, 19 Sep 2018 13:54:56 -0600 Subject: xfrm: validate template mode XFRM mode parameters passed as part of the user templates in the IP_XFRM_POLICY are never properly validated. Passing values other than valid XFRM modes can cause stack-out-of-bounds reads to occur later in the XFRM processing: [ 140.535608] ================================================================ [ 140.543058] BUG: KASAN: stack-out-of-bounds in xfrm_state_find+0x17e4/0x1cc4 [ 140.550306] Read of size 4 at addr ffffffc0238a7a58 by task repro/5148 [ 140.557369] [ 140.558927] Call trace: [ 140.558936] dump_backtrace+0x0/0x388 [ 140.558940] show_stack+0x24/0x30 [ 140.558946] __dump_stack+0x24/0x2c [ 140.558949] dump_stack+0x8c/0xd0 [ 140.558956] print_address_description+0x74/0x234 [ 140.558960] kasan_report+0x240/0x264 [ 140.558963] __asan_report_load4_noabort+0x2c/0x38 [ 140.558967] xfrm_state_find+0x17e4/0x1cc4 [ 140.558971] xfrm_resolve_and_create_bundle+0x40c/0x1fb8 [ 140.558975] xfrm_lookup+0x238/0x1444 [ 140.558977] xfrm_lookup_route+0x48/0x11c [ 140.558984] ip_route_output_flow+0x88/0xc4 [ 140.558991] raw_sendmsg+0xa74/0x266c [ 140.558996] inet_sendmsg+0x258/0x3b0 [ 140.559002] sock_sendmsg+0xbc/0xec [ 140.559005] SyS_sendto+0x3a8/0x5a8 [ 140.559008] el0_svc_naked+0x34/0x38 [ 140.559009] [ 140.592245] page dumped because: kasan: bad access detected [ 140.597981] page_owner info is not active (free page?) [ 140.603267] [ 140.653503] ================================================================ Signed-off-by: Sean Tranchetti Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_user.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 5151b3ebf068..d0672c400c2f 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1455,6 +1455,9 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family) (ut[i].family != prev_family)) return -EINVAL; + if (ut[i].mode >= XFRM_MODE_MAX) + return -EINVAL; + prev_family = ut[i].family; switch (ut[i].family) { -- cgit v1.2.3 From 2823c8716c687d6c7e261a3a02b3cab43809fe9c Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Mon, 27 Aug 2018 10:34:07 -0500 Subject: b43: fix DMA error related regression with proprietary firmware In commit 66cffd6daab7 ("b43: fix transmit failure when VT is switched"), a condition is noted where the network controller needs to be reset. Note that this situation happens when running the open-source firmware (http://netweb.ing.unibs.it/~openfwwf/), plus a number of other special conditions. for a different card model, it is reported that this change breaks operation running the proprietary firmware (https://marc.info/?l=linux-wireless&m=153504546924558&w=2). Rather than reverting the previous patch, the code is tweaked to avoid the reset unless the open-source firmware is being used. Fixes: 66cffd6daab7 ("b43: fix transmit failure when VT is switched") Cc: Stable # 4.18+ Cc: Taketo Kabe Reported-and-tested-by: D. Prabhu Signed-off-by: Larry Finger Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/b43/dma.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/broadcom/b43/dma.c b/drivers/net/wireless/broadcom/b43/dma.c index 6b0e1ec346cb..d46d57b989ae 100644 --- a/drivers/net/wireless/broadcom/b43/dma.c +++ b/drivers/net/wireless/broadcom/b43/dma.c @@ -1518,13 +1518,15 @@ void b43_dma_handle_txstatus(struct b43_wldev *dev, } } else { /* More than a single header/data pair were missed. - * Report this error, and reset the controller to + * Report this error. If running with open-source + * firmware, then reset the controller to * revive operation. */ b43dbg(dev->wl, "Out of order TX status report on DMA ring %d. Expected %d, but got %d\n", ring->index, firstused, slot); - b43_controller_restart(dev, "Out of order TX"); + if (dev->fw.opensource) + b43_controller_restart(dev, "Out of order TX"); return; } } -- cgit v1.2.3 From a173f066c7cfc031acb8f541708041e009fc9812 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 17 Sep 2018 08:20:36 -0700 Subject: netfilter: bridge: Don't sabotage nf_hook calls from an l3mdev For starters, the bridge netfilter code registers operations that are invoked any time nh_hook is called. Specifically, ip_sabotage_in watches for nested calls for NF_INET_PRE_ROUTING when a bridge is in the stack. Packet wise, the bridge netfilter hook runs first. br_nf_pre_routing allocates nf_bridge, sets in_prerouting to 1 and calls NF_HOOK for NF_INET_PRE_ROUTING. It's finish function, br_nf_pre_routing_finish, then resets in_prerouting flag to 0 and the packet continues up the stack. The packet eventually makes it to the VRF driver and it invokes nf_hook for NF_INET_PRE_ROUTING in case any rules have been added against the vrf device. Because of the registered operations the call to nf_hook causes ip_sabotage_in to be invoked. That function sees the nf_bridge on the skb and that in_prerouting is not set. Thinking it is an invalid nested call it steals (drops) the packet. Update ip_sabotage_in to recognize that the bridge or one of its upper devices (e.g., vlan) can be enslaved to a VRF (L3 master device) and allow the packet to go through the nf_hook a second time. Fixes: 73e20b761acf ("net: vrf: Add support for PREROUTING rules on vrf device") Reported-by: D'Souza, Nelson Signed-off-by: David Ahern Signed-off-by: Pablo Neira Ayuso --- net/bridge/br_netfilter_hooks.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 6e0dc6bcd32a..37278dc280eb 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -835,7 +835,8 @@ static unsigned int ip_sabotage_in(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - if (skb->nf_bridge && !skb->nf_bridge->in_prerouting) { + if (skb->nf_bridge && !skb->nf_bridge->in_prerouting && + !netif_is_l3_master(skb->dev)) { state->okfn(state->net, state->sk, skb); return NF_STOLEN; } -- cgit v1.2.3 From bab4344975fe2c719eda32de59298d6de26fe126 Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Mon, 17 Sep 2018 22:21:36 -0700 Subject: netfilter: nft_osf: use enum nft_data_types for nft_validate_register_store The function nft_validate_register_store requires a struct of type struct nft_data_types. NFTA_DATA_VALUE is of type enum nft_verdict_attributes. Pass the correct enum type. This fixes a warning seen with Clang: net/netfilter/nft_osf.c:52:8: warning: implicit conversion from enumeration type 'enum nft_data_attributes' to different enumeration type 'enum nft_data_types' [-Wenum-conversion] NFTA_DATA_VALUE, NFT_OSF_MAXGENRELEN); ^~~~~~~~~~~~~~~ Fixes: b96af92d6eaf ("netfilter: nf_tables: implement Passive OS fingerprint module in nft_osf") Link: https://github.com/ClangBuiltLinux/linux/issues/103 Signed-off-by: Stefan Agner Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_osf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c index 5af74b37f423..a35fb59ace73 100644 --- a/net/netfilter/nft_osf.c +++ b/net/netfilter/nft_osf.c @@ -49,7 +49,7 @@ static int nft_osf_init(const struct nft_ctx *ctx, priv->dreg = nft_parse_register(tb[NFTA_OSF_DREG]); err = nft_validate_register_store(ctx, priv->dreg, NULL, - NFTA_DATA_VALUE, NFT_OSF_MAXGENRELEN); + NFT_DATA_VALUE, NFT_OSF_MAXGENRELEN); if (err < 0) return err; -- cgit v1.2.3 From 346fa83d10934cf206e2fd0f514bf8ce186f08fe Mon Sep 17 00:00:00 2001 From: zhong jiang Date: Wed, 19 Sep 2018 20:21:11 +0800 Subject: netfilter: conntrack: get rid of double sizeof sizeof(sizeof()) is quite strange and does not seem to be what is wanted here. The issue is detected with the help of Coccinelle. Fixes: 39215846740a ("netfilter: conntrack: remove nlattr_size pointer from l4proto trackers") Signed-off-by: zhong jiang Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_proto_tcp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index b4bdf9eda7b7..247b89784a6f 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1213,8 +1213,8 @@ static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = { #define TCP_NLATTR_SIZE ( \ NLA_ALIGN(NLA_HDRLEN + 1) + \ NLA_ALIGN(NLA_HDRLEN + 1) + \ - NLA_ALIGN(NLA_HDRLEN + sizeof(sizeof(struct nf_ct_tcp_flags))) + \ - NLA_ALIGN(NLA_HDRLEN + sizeof(sizeof(struct nf_ct_tcp_flags)))) + NLA_ALIGN(NLA_HDRLEN + sizeof(struct nf_ct_tcp_flags)) + \ + NLA_ALIGN(NLA_HDRLEN + sizeof(struct nf_ct_tcp_flags))) static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct) { -- cgit v1.2.3 From 5b3686c7aaade973b8806dba4ecc99bec8c988f3 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 9 Aug 2018 14:44:29 +0800 Subject: ieee802154: Use kmemdup instead of duplicating it in ca8210_test_int_driver_write Replace calls to kmalloc followed by a memcpy with a direct call to kmemdup. Signed-off-by: YueHaibing Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/ca8210.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ieee802154/ca8210.c b/drivers/net/ieee802154/ca8210.c index 58299fb666ed..e21279dde85c 100644 --- a/drivers/net/ieee802154/ca8210.c +++ b/drivers/net/ieee802154/ca8210.c @@ -634,10 +634,9 @@ static int ca8210_test_int_driver_write( for (i = 0; i < len; i++) dev_dbg(&priv->spi->dev, "%#03x\n", buf[i]); - fifo_buffer = kmalloc(len, GFP_KERNEL); + fifo_buffer = kmemdup(buf, len, GFP_KERNEL); if (!fifo_buffer) return -ENOMEM; - memcpy(fifo_buffer, buf, len); kfifo_in(&test->up_fifo, &fifo_buffer, 4); wake_up_interruptible(&priv->test.readq); -- cgit v1.2.3 From 98e616fe7c94f9c787092b6364405d99bdf42153 Mon Sep 17 00:00:00 2001 From: zhong jiang Date: Sat, 8 Sep 2018 21:44:08 +0800 Subject: ieee802154: remove unecessary condition check before debugfs_remove_recursive debugfs_remove_recursive has taken IS_ERR_OR_NULL into account. So just remove the condition check before debugfs_remove_recursive. Signed-off-by: zhong jiang Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/adf7242.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ieee802154/adf7242.c b/drivers/net/ieee802154/adf7242.c index 23a52b9293f3..cd1d8faccca5 100644 --- a/drivers/net/ieee802154/adf7242.c +++ b/drivers/net/ieee802154/adf7242.c @@ -1308,8 +1308,7 @@ static int adf7242_remove(struct spi_device *spi) { struct adf7242_local *lp = spi_get_drvdata(spi); - if (!IS_ERR_OR_NULL(lp->debugfs_root)) - debugfs_remove_recursive(lp->debugfs_root); + debugfs_remove_recursive(lp->debugfs_root); cancel_delayed_work_sync(&lp->work); destroy_workqueue(lp->wqueue); -- cgit v1.2.3 From e1e5d8a9fe737d94ccc0ccbaf0c97f69a8f3e000 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Tue, 25 Sep 2018 08:32:50 +0200 Subject: net: macb: Clean 64b dma addresses if they are not detected Clear ADDR64 dma bit in DMACFG register in case that HW_DMA_CAP_64B is not detected on 64bit system. The issue was observed when bootloader(u-boot) does not check macb feature at DCFG6 register (DAW64_OFFSET) and enabling 64bit dma support by default. Then macb driver is reading DMACFG register back and only adding 64bit dma configuration but not cleaning it out. Signed-off-by: Michal Simek Acked-by: Nicolas Ferre Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index f1a86b422617..58b9744c4058 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -2160,6 +2160,7 @@ static void macb_configure_dma(struct macb *bp) else dmacfg &= ~GEM_BIT(TXCOEN); + dmacfg &= ~GEM_BIT(ADDR64); #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT if (bp->hw_dma_cap & HW_DMA_CAP_64B) dmacfg |= GEM_BIT(ADDR64); -- cgit v1.2.3 From 780e83c259fc33e8959fed8dfdad17e378d72b62 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 25 Sep 2018 02:12:30 -0600 Subject: xen-netback: fix input validation in xenvif_set_hash_mapping() Both len and off are frontend specified values, so we need to make sure there's no overflow when adding the two for the bounds check. We also want to avoid undefined behavior and hence use off to index into ->hash.mapping[] only after bounds checking. This at the same time allows to take care of not applying off twice for the bounds checking against vif->num_queues. It is also insufficient to bounds check copy_op.len, as this is len truncated to 16 bits. This is XSA-270 / CVE-2018-15471. Reported-by: Felix Wilhelm Signed-off-by: Jan Beulich Reviewed-by: Paul Durrant Tested-by: Paul Durrant Cc: stable@vger.kernel.org [4.7 onwards] Signed-off-by: David S. Miller --- drivers/net/xen-netback/hash.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/net/xen-netback/hash.c b/drivers/net/xen-netback/hash.c index 3c4c58b9fe76..3b6fb5b3bdb2 100644 --- a/drivers/net/xen-netback/hash.c +++ b/drivers/net/xen-netback/hash.c @@ -332,20 +332,22 @@ u32 xenvif_set_hash_mapping_size(struct xenvif *vif, u32 size) u32 xenvif_set_hash_mapping(struct xenvif *vif, u32 gref, u32 len, u32 off) { - u32 *mapping = &vif->hash.mapping[off]; + u32 *mapping = vif->hash.mapping; struct gnttab_copy copy_op = { .source.u.ref = gref, .source.domid = vif->domid, - .dest.u.gmfn = virt_to_gfn(mapping), .dest.domid = DOMID_SELF, - .dest.offset = xen_offset_in_page(mapping), - .len = len * sizeof(u32), + .len = len * sizeof(*mapping), .flags = GNTCOPY_source_gref }; - if ((off + len > vif->hash.size) || copy_op.len > XEN_PAGE_SIZE) + if ((off + len < off) || (off + len > vif->hash.size) || + len > XEN_PAGE_SIZE / sizeof(*mapping)) return XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER; + copy_op.dest.u.gmfn = virt_to_gfn(mapping + off); + copy_op.dest.offset = xen_offset_in_page(mapping + off); + while (len-- != 0) if (mapping[off++] >= vif->num_queues) return XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER; -- cgit v1.2.3 From 22f9cde3401077ea450b69bf9b0bba373e12e454 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 25 Sep 2018 02:13:01 -0600 Subject: xen-netback: validate queue numbers in xenvif_set_hash_mapping() Checking them before the grant copy means nothing as to the validity of the incoming request. As we shouldn't make the new data live before having validated it, introduce a second instance of the mapping array. Signed-off-by: Jan Beulich Reviewed-by: Paul Durrant Signed-off-by: David S. Miller --- drivers/net/xen-netback/common.h | 3 ++- drivers/net/xen-netback/hash.c | 20 ++++++++++++++------ drivers/net/xen-netback/interface.c | 3 ++- 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index a46a1e94505d..936c0b3e0ba2 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -241,8 +241,9 @@ struct xenvif_hash_cache { struct xenvif_hash { unsigned int alg; u32 flags; + bool mapping_sel; u8 key[XEN_NETBK_MAX_HASH_KEY_SIZE]; - u32 mapping[XEN_NETBK_MAX_HASH_MAPPING_SIZE]; + u32 mapping[2][XEN_NETBK_MAX_HASH_MAPPING_SIZE]; unsigned int size; struct xenvif_hash_cache cache; }; diff --git a/drivers/net/xen-netback/hash.c b/drivers/net/xen-netback/hash.c index 3b6fb5b3bdb2..dc9841ea2fff 100644 --- a/drivers/net/xen-netback/hash.c +++ b/drivers/net/xen-netback/hash.c @@ -324,7 +324,8 @@ u32 xenvif_set_hash_mapping_size(struct xenvif *vif, u32 size) return XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER; vif->hash.size = size; - memset(vif->hash.mapping, 0, sizeof(u32) * size); + memset(vif->hash.mapping[vif->hash.mapping_sel], 0, + sizeof(u32) * size); return XEN_NETIF_CTRL_STATUS_SUCCESS; } @@ -332,7 +333,7 @@ u32 xenvif_set_hash_mapping_size(struct xenvif *vif, u32 size) u32 xenvif_set_hash_mapping(struct xenvif *vif, u32 gref, u32 len, u32 off) { - u32 *mapping = vif->hash.mapping; + u32 *mapping = vif->hash.mapping[!vif->hash.mapping_sel]; struct gnttab_copy copy_op = { .source.u.ref = gref, .source.domid = vif->domid, @@ -348,9 +349,8 @@ u32 xenvif_set_hash_mapping(struct xenvif *vif, u32 gref, u32 len, copy_op.dest.u.gmfn = virt_to_gfn(mapping + off); copy_op.dest.offset = xen_offset_in_page(mapping + off); - while (len-- != 0) - if (mapping[off++] >= vif->num_queues) - return XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER; + memcpy(mapping, vif->hash.mapping[vif->hash.mapping_sel], + vif->hash.size * sizeof(*mapping)); if (copy_op.len != 0) { gnttab_batch_copy(©_op, 1); @@ -359,6 +359,12 @@ u32 xenvif_set_hash_mapping(struct xenvif *vif, u32 gref, u32 len, return XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER; } + while (len-- != 0) + if (mapping[off++] >= vif->num_queues) + return XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER; + + vif->hash.mapping_sel = !vif->hash.mapping_sel; + return XEN_NETIF_CTRL_STATUS_SUCCESS; } @@ -410,6 +416,8 @@ void xenvif_dump_hash_info(struct xenvif *vif, struct seq_file *m) } if (vif->hash.size != 0) { + const u32 *mapping = vif->hash.mapping[vif->hash.mapping_sel]; + seq_puts(m, "\nHash Mapping:\n"); for (i = 0; i < vif->hash.size; ) { @@ -422,7 +430,7 @@ void xenvif_dump_hash_info(struct xenvif *vif, struct seq_file *m) seq_printf(m, "[%4u - %4u]: ", i, i + n - 1); for (j = 0; j < n; j++, i++) - seq_printf(m, "%4u ", vif->hash.mapping[i]); + seq_printf(m, "%4u ", mapping[i]); seq_puts(m, "\n"); } diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 92274c237200..f6ae23fc3f6b 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -162,7 +162,8 @@ static u16 xenvif_select_queue(struct net_device *dev, struct sk_buff *skb, if (size == 0) return skb_get_hash_raw(skb) % dev->real_num_tx_queues; - return vif->hash.mapping[skb_get_hash_raw(skb) % size]; + return vif->hash.mapping[vif->hash.mapping_sel] + [skb_get_hash_raw(skb) % size]; } static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) -- cgit v1.2.3 From 871088bf92e11efb69bbdbd537e48c0ad4f63729 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 25 Sep 2018 02:13:37 -0600 Subject: xen-netback: handle page straddling in xenvif_set_hash_mapping() There's no guarantee that the mapping array doesn't cross a page boundary. Use a second grant copy operation if necessary. Signed-off-by: Jan Beulich Acked-by: Wei Liu Reviewed-by: Paul Durrant Signed-off-by: David S. Miller --- drivers/net/xen-netback/hash.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/drivers/net/xen-netback/hash.c b/drivers/net/xen-netback/hash.c index dc9841ea2fff..0ccb021f1e78 100644 --- a/drivers/net/xen-netback/hash.c +++ b/drivers/net/xen-netback/hash.c @@ -334,28 +334,39 @@ u32 xenvif_set_hash_mapping(struct xenvif *vif, u32 gref, u32 len, u32 off) { u32 *mapping = vif->hash.mapping[!vif->hash.mapping_sel]; - struct gnttab_copy copy_op = { + unsigned int nr = 1; + struct gnttab_copy copy_op[2] = {{ .source.u.ref = gref, .source.domid = vif->domid, .dest.domid = DOMID_SELF, .len = len * sizeof(*mapping), .flags = GNTCOPY_source_gref - }; + }}; if ((off + len < off) || (off + len > vif->hash.size) || len > XEN_PAGE_SIZE / sizeof(*mapping)) return XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER; - copy_op.dest.u.gmfn = virt_to_gfn(mapping + off); - copy_op.dest.offset = xen_offset_in_page(mapping + off); + copy_op[0].dest.u.gmfn = virt_to_gfn(mapping + off); + copy_op[0].dest.offset = xen_offset_in_page(mapping + off); + if (copy_op[0].dest.offset + copy_op[0].len > XEN_PAGE_SIZE) { + copy_op[1] = copy_op[0]; + copy_op[1].source.offset = XEN_PAGE_SIZE - copy_op[0].dest.offset; + copy_op[1].dest.u.gmfn = virt_to_gfn(mapping + off + len); + copy_op[1].dest.offset = 0; + copy_op[1].len = copy_op[0].len - copy_op[1].source.offset; + copy_op[0].len = copy_op[1].source.offset; + nr = 2; + } memcpy(mapping, vif->hash.mapping[vif->hash.mapping_sel], vif->hash.size * sizeof(*mapping)); - if (copy_op.len != 0) { - gnttab_batch_copy(©_op, 1); + if (copy_op[0].len != 0) { + gnttab_batch_copy(copy_op, nr); - if (copy_op.status != GNTST_okay) + if (copy_op[0].status != GNTST_okay || + copy_op[nr - 1].status != GNTST_okay) return XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER; } -- cgit v1.2.3 From 2e9361efa707e186d91b938e44f9e326725259f7 Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Tue, 25 Sep 2018 10:21:55 +0100 Subject: net: hns: fix for unmapping problem when SMMU is on If SMMU is on, there is more likely that skb_shinfo(skb)->frags[i] can not send by a single BD. when this happen, the hns_nic_net_xmit_hw function map the whole data in a frags using skb_frag_dma_map, but unmap each BD' data individually when tx is done, which causes problem when SMMU is on. This patch fixes this problem by ummapping the whole data in a frags when tx is done. Signed-off-by: Yunsheng Lin Signed-off-by: Peng Li Reviewed-by: Yisen Zhuang Signed-off-by: Salil Mehta Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hnae.c | 2 +- drivers/net/ethernet/hisilicon/hns/hns_enet.c | 30 +++++++++++++++++---------- 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.c b/drivers/net/ethernet/hisilicon/hns/hnae.c index a051e582d541..79d03f8ee7b1 100644 --- a/drivers/net/ethernet/hisilicon/hns/hnae.c +++ b/drivers/net/ethernet/hisilicon/hns/hnae.c @@ -84,7 +84,7 @@ static void hnae_unmap_buffer(struct hnae_ring *ring, struct hnae_desc_cb *cb) if (cb->type == DESC_TYPE_SKB) dma_unmap_single(ring_to_dev(ring), cb->dma, cb->length, ring_to_dma_dir(ring)); - else + else if (cb->length) dma_unmap_page(ring_to_dev(ring), cb->dma, cb->length, ring_to_dma_dir(ring)); } diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index f56855e63c96..5ce23d4b717e 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -40,9 +40,9 @@ #define SKB_TMP_LEN(SKB) \ (((SKB)->transport_header - (SKB)->mac_header) + tcp_hdrlen(SKB)) -static void fill_v2_desc(struct hnae_ring *ring, void *priv, - int size, dma_addr_t dma, int frag_end, - int buf_num, enum hns_desc_type type, int mtu) +static void fill_v2_desc_hw(struct hnae_ring *ring, void *priv, int size, + int send_sz, dma_addr_t dma, int frag_end, + int buf_num, enum hns_desc_type type, int mtu) { struct hnae_desc *desc = &ring->desc[ring->next_to_use]; struct hnae_desc_cb *desc_cb = &ring->desc_cb[ring->next_to_use]; @@ -64,7 +64,7 @@ static void fill_v2_desc(struct hnae_ring *ring, void *priv, desc_cb->type = type; desc->addr = cpu_to_le64(dma); - desc->tx.send_size = cpu_to_le16((u16)size); + desc->tx.send_size = cpu_to_le16((u16)send_sz); /* config bd buffer end */ hnae_set_bit(rrcfv, HNSV2_TXD_VLD_B, 1); @@ -133,6 +133,14 @@ static void fill_v2_desc(struct hnae_ring *ring, void *priv, ring_ptr_move_fw(ring, next_to_use); } +static void fill_v2_desc(struct hnae_ring *ring, void *priv, + int size, dma_addr_t dma, int frag_end, + int buf_num, enum hns_desc_type type, int mtu) +{ + fill_v2_desc_hw(ring, priv, size, size, dma, frag_end, + buf_num, type, mtu); +} + static const struct acpi_device_id hns_enet_acpi_match[] = { { "HISI00C1", 0 }, { "HISI00C2", 0 }, @@ -289,15 +297,15 @@ static void fill_tso_desc(struct hnae_ring *ring, void *priv, /* when the frag size is bigger than hardware, split this frag */ for (k = 0; k < frag_buf_num; k++) - fill_v2_desc(ring, priv, - (k == frag_buf_num - 1) ? + fill_v2_desc_hw(ring, priv, k == 0 ? size : 0, + (k == frag_buf_num - 1) ? sizeoflast : BD_MAX_SEND_SIZE, - dma + BD_MAX_SEND_SIZE * k, - frag_end && (k == frag_buf_num - 1) ? 1 : 0, - buf_num, - (type == DESC_TYPE_SKB && !k) ? + dma + BD_MAX_SEND_SIZE * k, + frag_end && (k == frag_buf_num - 1) ? 1 : 0, + buf_num, + (type == DESC_TYPE_SKB && !k) ? DESC_TYPE_SKB : DESC_TYPE_PAGE, - mtu); + mtu); } netdev_tx_t hns_nic_net_xmit_hw(struct net_device *ndev, -- cgit v1.2.3 From 92ef12b32feab8f277b69e9fb89ede2796777f4d Mon Sep 17 00:00:00 2001 From: Parthasarathy Bhuvaragan Date: Tue, 25 Sep 2018 18:21:58 +0200 Subject: tipc: fix flow control accounting for implicit connect In the case of implicit connect message with data > 1K, the flow control accounting is incorrect. At this state, the socket does not know the peer nodes capability and falls back to legacy flow control by return 1, however the receiver of this message will perform the new block accounting. This leads to a slack and eventually traffic disturbance. In this commit, we perform tipc_node_get_capabilities() at implicit connect and perform accounting based on the peer's capability. Signed-off-by: Parthasarathy Bhuvaragan Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 3f03ddd0e35b..b6f99b021d09 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1419,8 +1419,10 @@ static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen) /* Handle implicit connection setup */ if (unlikely(dest)) { rc = __tipc_sendmsg(sock, m, dlen); - if (dlen && (dlen == rc)) + if (dlen && dlen == rc) { + tsk->peer_caps = tipc_node_get_capabilities(net, dnode); tsk->snt_unacked = tsk_inc(tsk, dlen + msg_hdr_sz(hdr)); + } return rc; } -- cgit v1.2.3 From 94b6ddce71780575fbbf9d2c36afc8440e61a281 Mon Sep 17 00:00:00 2001 From: Parthasarathy Bhuvaragan Date: Tue, 25 Sep 2018 21:56:57 +0200 Subject: tipc: reset bearer if device carrier not ok If we detect that under lying carrier detects errors and goes down, we reset the bearer. Signed-off-by: Parthasarathy Bhuvaragan Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/bearer.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 418f03d0be90..645c16052052 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -609,16 +609,18 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, switch (evt) { case NETDEV_CHANGE: - if (netif_carrier_ok(dev)) + if (netif_carrier_ok(dev) && netif_oper_up(dev)) { + test_and_set_bit_lock(0, &b->up); break; - /* else: fall through */ - case NETDEV_UP: - test_and_set_bit_lock(0, &b->up); - break; + } + /* fall through */ case NETDEV_GOING_DOWN: clear_bit_unlock(0, &b->up); tipc_reset_bearer(net, b); break; + case NETDEV_UP: + test_and_set_bit_lock(0, &b->up); + break; case NETDEV_CHANGEMTU: if (tipc_mtu_bad(dev, 0)) { bearer_disable(net, b); -- cgit v1.2.3 From 3f32d0be6c16b902b687453c962d17eea5b8ea19 Mon Sep 17 00:00:00 2001 From: Parthasarathy Bhuvaragan Date: Tue, 25 Sep 2018 22:09:10 +0200 Subject: tipc: lock wakeup & inputq at tipc_link_reset() In tipc_link_reset() we copy the wakeup queue to input queue using skb_queue_splice_init(link->wakeupq, link->inputq). This is performed without holding any locks. The lists might be simultaneously be accessed by other cpu threads in tipc_sk_rcv(), something leading to to random missing packets. Signed-off-by: Parthasarathy Bhuvaragan Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/link.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/tipc/link.c b/net/tipc/link.c index b1f0bee54eac..26cc033ee167 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -841,9 +841,14 @@ void tipc_link_reset(struct tipc_link *l) l->in_session = false; l->session++; l->mtu = l->advertised_mtu; + spin_lock_bh(&l->wakeupq.lock); + spin_lock_bh(&l->inputq->lock); + skb_queue_splice_init(&l->wakeupq, l->inputq); + spin_unlock_bh(&l->inputq->lock); + spin_unlock_bh(&l->wakeupq.lock); + __skb_queue_purge(&l->transmq); __skb_queue_purge(&l->deferdq); - skb_queue_splice_init(&l->wakeupq, l->inputq); __skb_queue_purge(&l->backlogq); l->backlog[TIPC_LOW_IMPORTANCE].len = 0; l->backlog[TIPC_MEDIUM_IMPORTANCE].len = 0; -- cgit v1.2.3 From 8105f9b8a8879bff7f1d43d0720c993a99c9d135 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sat, 22 Sep 2018 18:35:31 +0200 Subject: mac80211: allocate TXQs for active monitor interfaces Monitor mode interfaces with the active flag are passed down to the driver. Drivers using TXQ expect that all interfaces have allocated TXQs before they get added. Fixes: 79af1f866193d ("mac80211: avoid allocating TXQs that won't be used") Cc: stable@vger.kernel.org Reported-by: Catrinel Catrinescu Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 5e6cf2cee965..5836ddeac9e3 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1756,7 +1756,8 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, if (local->ops->wake_tx_queue && type != NL80211_IFTYPE_AP_VLAN && - type != NL80211_IFTYPE_MONITOR) + (type != NL80211_IFTYPE_MONITOR || + (params->flags & MONITOR_FLAG_ACTIVE))) txq_size += sizeof(struct txq_info) + local->hw.txq_data_size; -- cgit v1.2.3 From 0bcbf6518456f63038a290bd359237d31f6f8ac3 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 23 Sep 2018 11:59:13 -0700 Subject: cfg80211: fix reg_query_regdb_wmm kernel-doc Drop @ptr from kernel-doc for function reg_query_regdb_wmm(). This function parameter was recently removed so update the kernel-doc to match that and remove the kernel-doc warnings. Removes 109 occurrences of this warning message: ../include/net/cfg80211.h:4869: warning: Excess function parameter 'ptr' description in 'reg_query_regdb_wmm' Fixes: 38cb87ee47fb ("cfg80211: make wmm_rule part of the reg_rule structure") Signed-off-by: Randy Dunlap Cc: Stanislaw Gruszka Cc: Johannes Berg Cc: Kalle Valo Cc: linux-wireless@vger.kernel.org Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 8ebabc9873d1..4de121e24ce5 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4852,8 +4852,6 @@ const char *reg_initiator_name(enum nl80211_reg_initiator initiator); * * @alpha2: the ISO/IEC 3166 alpha2 wmm rule to be queried. * @freq: the freqency(in MHz) to be queried. - * @ptr: pointer where the regdb wmm data is to be stored (or %NULL if - * irrelevant). This can be used later for deduplication. * @rule: pointer to store the wmm rule from the regulatory db. * * Self-managed wireless drivers can use this function to query -- cgit v1.2.3 From 30fe6d50eb088783c8729c7d930f65296b2b3fa7 Mon Sep 17 00:00:00 2001 From: Masashi Honma Date: Tue, 25 Sep 2018 11:15:00 +0900 Subject: nl80211: Fix possible Spectre-v1 for NL80211_TXRATE_HT Use array_index_nospec() to sanitize ridx with respect to speculation. Signed-off-by: Masashi Honma Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 4b8ec659e797..bd26230de63e 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3756,6 +3756,7 @@ static bool ht_rateset_to_mask(struct ieee80211_supported_band *sband, return false; /* check availability */ + ridx = array_index_nospec(ridx, IEEE80211_HT_MCS_MASK_LEN); if (sband->ht_cap.mcs.rx_mask[ridx] & rbit) mcs[ridx] |= rbit; else -- cgit v1.2.3 From 628980e5c8f038f730582c6ee50b7410741cd96e Mon Sep 17 00:00:00 2001 From: Martin Willi Date: Tue, 25 Sep 2018 09:41:13 +0200 Subject: mac80211_hwsim: fix locking when iterating radios during ns exit The cleanup of radios during namespace exit has recently been reworked to directly delete a radio while temporarily releasing the spinlock, fixing a race condition between the work-queue execution and namespace exits. However, the temporary unlock allows unsafe modifications on the iterated list, resulting in a potential crash when continuing the iteration of additional radios. Move radios about to destroy to a temporary list, and clean that up after releasing the spinlock once iteration is complete. Fixes: 8cfd36a0b53a ("mac80211_hwsim: fix use-after-free bug in hwsim_exit_net") Signed-off-by: Martin Willi Signed-off-by: Johannes Berg --- drivers/net/wireless/mac80211_hwsim.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 1068757ec42e..f1150d321875 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -3646,6 +3646,7 @@ static __net_init int hwsim_init_net(struct net *net) static void __net_exit hwsim_exit_net(struct net *net) { struct mac80211_hwsim_data *data, *tmp; + LIST_HEAD(list); spin_lock_bh(&hwsim_radio_lock); list_for_each_entry_safe(data, tmp, &hwsim_radios, list) { @@ -3656,17 +3657,19 @@ static void __net_exit hwsim_exit_net(struct net *net) if (data->netgroup == hwsim_net_get_netgroup(&init_net)) continue; - list_del(&data->list); + list_move(&data->list, &list); rhashtable_remove_fast(&hwsim_radios_rht, &data->rht, hwsim_rht_params); hwsim_radios_generation++; - spin_unlock_bh(&hwsim_radio_lock); + } + spin_unlock_bh(&hwsim_radio_lock); + + list_for_each_entry_safe(data, tmp, &list, list) { + list_del(&data->list); mac80211_hwsim_del_radio(data, wiphy_name(data->hw->wiphy), NULL); - spin_lock_bh(&hwsim_radio_lock); } - spin_unlock_bh(&hwsim_radio_lock); ida_simple_remove(&hwsim_netgroup_ida, hwsim_net_get_netgroup(net)); } -- cgit v1.2.3 From f1c47eb61d52379de5747d02bb36be20d7a2d0d3 Mon Sep 17 00:00:00 2001 From: Martin Willi Date: Tue, 25 Sep 2018 09:41:14 +0200 Subject: mac80211_hwsim: fix race in radio destruction from netlink notifier The asynchronous destruction from a work-queue of radios tagged with destroy-on-close may race with the owning namespace about to exit, resulting in potential use-after-free of that namespace. Instead of using a work-queue, move radios about to destroy to a temporary list, which can be worked on synchronously after releasing the lock. This should be safe to do from the netlink socket notifier, as the namespace is guaranteed to not get released. Signed-off-by: Martin Willi Signed-off-by: Johannes Berg --- drivers/net/wireless/mac80211_hwsim.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index f1150d321875..6fccb4d717db 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -520,7 +520,6 @@ struct mac80211_hwsim_data { int channels, idx; bool use_chanctx; bool destroy_on_close; - struct work_struct destroy_work; u32 portid; char alpha2[2]; const struct ieee80211_regdomain *regd; @@ -3565,30 +3564,27 @@ static struct genl_family hwsim_genl_family __ro_after_init = { .n_mcgrps = ARRAY_SIZE(hwsim_mcgrps), }; -static void destroy_radio(struct work_struct *work) -{ - struct mac80211_hwsim_data *data = - container_of(work, struct mac80211_hwsim_data, destroy_work); - - hwsim_radios_generation++; - mac80211_hwsim_del_radio(data, wiphy_name(data->hw->wiphy), NULL); -} - static void remove_user_radios(u32 portid) { struct mac80211_hwsim_data *entry, *tmp; + LIST_HEAD(list); spin_lock_bh(&hwsim_radio_lock); list_for_each_entry_safe(entry, tmp, &hwsim_radios, list) { if (entry->destroy_on_close && entry->portid == portid) { - list_del(&entry->list); + list_move(&entry->list, &list); rhashtable_remove_fast(&hwsim_radios_rht, &entry->rht, hwsim_rht_params); - INIT_WORK(&entry->destroy_work, destroy_radio); - queue_work(hwsim_wq, &entry->destroy_work); + hwsim_radios_generation++; } } spin_unlock_bh(&hwsim_radio_lock); + + list_for_each_entry_safe(entry, tmp, &list, list) { + list_del(&entry->list); + mac80211_hwsim_del_radio(entry, wiphy_name(entry->hw->wiphy), + NULL); + } } static int mac80211_hwsim_netlink_notify(struct notifier_block *nb, -- cgit v1.2.3 From 28ef8b49a338dc1844e86b7954cfffc7dfa2660a Mon Sep 17 00:00:00 2001 From: Martin Willi Date: Tue, 25 Sep 2018 09:51:02 +0200 Subject: mac80211_hwsim: do not omit multicast announce of first added radio The allocation of hwsim radio identifiers uses a post-increment from 0, so the first radio has idx 0. This idx is explicitly excluded from multicast announcements ever since, but it is unclear why. Drop that idx check and announce the first radio as well. This makes userspace happy if it relies on these events. Signed-off-by: Martin Willi Signed-off-by: Johannes Berg --- drivers/net/wireless/mac80211_hwsim.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 6fccb4d717db..07442ada6dd0 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -2934,8 +2934,7 @@ static int mac80211_hwsim_new_radio(struct genl_info *info, hwsim_radios_generation++; spin_unlock_bh(&hwsim_radio_lock); - if (idx > 0) - hwsim_mcast_new_radio(idx, info, param); + hwsim_mcast_new_radio(idx, info, param); return idx; -- cgit v1.2.3 From cb28c306b93b71f2741ce1a5a66289db26715f4d Mon Sep 17 00:00:00 2001 From: Matias Karhumaa Date: Wed, 26 Sep 2018 09:13:46 +0300 Subject: Bluetooth: SMP: fix crash in unpairing In case unpair_device() was called through mgmt interface at the same time when pairing was in progress, Bluetooth kernel module crash was seen. [ 600.351225] general protection fault: 0000 [#1] SMP PTI [ 600.351235] CPU: 1 PID: 11096 Comm: btmgmt Tainted: G OE 4.19.0-rc1+ #1 [ 600.351238] Hardware name: Dell Inc. Latitude E5440/08RCYC, BIOS A18 05/14/2017 [ 600.351272] RIP: 0010:smp_chan_destroy.isra.10+0xce/0x2c0 [bluetooth] [ 600.351276] Code: c0 0f 84 b4 01 00 00 80 78 28 04 0f 84 53 01 00 00 4d 85 ed 0f 85 ab 00 00 00 48 8b 08 48 8b 50 08 be 10 00 00 00 48 89 51 08 <48> 89 0a 48 b9 00 02 00 00 00 00 ad de 48 89 48 08 48 8b 83 00 01 [ 600.351279] RSP: 0018:ffffa9be839b3b50 EFLAGS: 00010246 [ 600.351282] RAX: ffff9c999ac565a0 RBX: ffff9c9996e98c00 RCX: ffff9c999aa28b60 [ 600.351285] RDX: dead000000000200 RSI: 0000000000000010 RDI: ffff9c999e403500 [ 600.351287] RBP: ffffa9be839b3b70 R08: 0000000000000000 R09: ffffffff92a25c00 [ 600.351290] R10: ffffa9be839b3ae8 R11: 0000000000000001 R12: ffff9c995375b800 [ 600.351292] R13: 0000000000000000 R14: ffff9c99619a5000 R15: ffff9c9962a01c00 [ 600.351295] FS: 00007fb2be27c700(0000) GS:ffff9c999e880000(0000) knlGS:0000000000000000 [ 600.351298] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 600.351300] CR2: 00007fb2bdadbad0 CR3: 000000041c328001 CR4: 00000000001606e0 [ 600.351302] Call Trace: [ 600.351325] smp_failure+0x4f/0x70 [bluetooth] [ 600.351345] smp_cancel_pairing+0x74/0x80 [bluetooth] [ 600.351370] unpair_device+0x1c1/0x330 [bluetooth] [ 600.351399] hci_sock_sendmsg+0x960/0x9f0 [bluetooth] [ 600.351409] ? apparmor_socket_sendmsg+0x1e/0x20 [ 600.351417] sock_sendmsg+0x3e/0x50 [ 600.351422] sock_write_iter+0x85/0xf0 [ 600.351429] do_iter_readv_writev+0x12b/0x1b0 [ 600.351434] do_iter_write+0x87/0x1a0 [ 600.351439] vfs_writev+0x98/0x110 [ 600.351443] ? ep_poll+0x16d/0x3d0 [ 600.351447] ? ep_modify+0x73/0x170 [ 600.351451] do_writev+0x61/0xf0 [ 600.351455] ? do_writev+0x61/0xf0 [ 600.351460] __x64_sys_writev+0x1c/0x20 [ 600.351465] do_syscall_64+0x5a/0x110 [ 600.351471] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 600.351474] RIP: 0033:0x7fb2bdb62fe0 [ 600.351477] Code: 73 01 c3 48 8b 0d b8 6e 2c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 0f 1f 44 00 00 83 3d 69 c7 2c 00 00 75 10 b8 14 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 de 80 01 00 48 89 04 24 [ 600.351479] RSP: 002b:00007ffe062cb8f8 EFLAGS: 00000246 ORIG_RAX: 0000000000000014 [ 600.351484] RAX: ffffffffffffffda RBX: 000000000255b3d0 RCX: 00007fb2bdb62fe0 [ 600.351487] RDX: 0000000000000001 RSI: 00007ffe062cb920 RDI: 0000000000000004 [ 600.351490] RBP: 00007ffe062cb920 R08: 000000000255bd80 R09: 0000000000000000 [ 600.351494] R10: 0000000000000353 R11: 0000000000000246 R12: 0000000000000001 [ 600.351497] R13: 00007ffe062cbbe0 R14: 0000000000000000 R15: 0000000000000000 [ 600.351501] Modules linked in: algif_hash algif_skcipher af_alg cmac ipt_MASQUERADE nf_conntrack_netlink nfnetlink xfrm_user xfrm_algo iptable_nat nf_nat_ipv4 xt_addrtype iptable_filter ip_tables xt_conntrack x_tables nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 libcrc32c br_netfilter bridge stp llc overlay arc4 nls_iso8859_1 dm_crypt intel_rapl x86_pkg_temp_thermal intel_powerclamp coretemp dell_laptop kvm_intel crct10dif_pclmul dell_smm_hwmon crc32_pclmul ghash_clmulni_intel pcbc aesni_intel aes_x86_64 crypto_simd cryptd glue_helper intel_cstate intel_rapl_perf uvcvideo videobuf2_vmalloc videobuf2_memops videobuf2_v4l2 videobuf2_common videodev media hid_multitouch input_leds joydev serio_raw dell_wmi snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic dell_smbios dcdbas sparse_keymap [ 600.351569] snd_hda_intel btusb snd_hda_codec btrtl btbcm btintel snd_hda_core bluetooth(OE) snd_hwdep snd_pcm iwlmvm ecdh_generic wmi_bmof dell_wmi_descriptor snd_seq_midi mac80211 snd_seq_midi_event lpc_ich iwlwifi snd_rawmidi snd_seq snd_seq_device snd_timer cfg80211 snd soundcore mei_me mei dell_rbtn dell_smo8800 mac_hid parport_pc ppdev lp parport autofs4 hid_generic usbhid hid i915 nouveau kvmgt vfio_mdev mdev vfio_iommu_type1 vfio kvm irqbypass i2c_algo_bit ttm drm_kms_helper syscopyarea sysfillrect sysimgblt mxm_wmi psmouse ahci sdhci_pci cqhci libahci fb_sys_fops sdhci drm e1000e video wmi [ 600.351637] ---[ end trace e49e9f1df09c94fb ]--- [ 600.351664] RIP: 0010:smp_chan_destroy.isra.10+0xce/0x2c0 [bluetooth] [ 600.351666] Code: c0 0f 84 b4 01 00 00 80 78 28 04 0f 84 53 01 00 00 4d 85 ed 0f 85 ab 00 00 00 48 8b 08 48 8b 50 08 be 10 00 00 00 48 89 51 08 <48> 89 0a 48 b9 00 02 00 00 00 00 ad de 48 89 48 08 48 8b 83 00 01 [ 600.351669] RSP: 0018:ffffa9be839b3b50 EFLAGS: 00010246 [ 600.351672] RAX: ffff9c999ac565a0 RBX: ffff9c9996e98c00 RCX: ffff9c999aa28b60 [ 600.351674] RDX: dead000000000200 RSI: 0000000000000010 RDI: ffff9c999e403500 [ 600.351676] RBP: ffffa9be839b3b70 R08: 0000000000000000 R09: ffffffff92a25c00 [ 600.351679] R10: ffffa9be839b3ae8 R11: 0000000000000001 R12: ffff9c995375b800 [ 600.351681] R13: 0000000000000000 R14: ffff9c99619a5000 R15: ffff9c9962a01c00 [ 600.351684] FS: 00007fb2be27c700(0000) GS:ffff9c999e880000(0000) knlGS:0000000000000000 [ 600.351686] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 600.351689] CR2: 00007fb2bdadbad0 CR3: 000000041c328001 CR4: 00000000001606e0 Crash happened because list_del_rcu() was called twice for smp->ltk. This was possible if unpair_device was called right after ltk was generated but before keys were distributed. In this commit smp_cancel_pairing was refactored to cancel pairing if it is in progress and otherwise just removes keys. Once keys are removed from rcu list, pointers to smp context's keys are set to NULL to make sure removed list items are not accessed later. This commit also adjusts the functionality of mgmt unpair_device() little bit. Previously pairing was canceled only if pairing was in state that keys were already generated. With this commit unpair_device() cancels pairing already in earlier states. Bug was found by fuzzing kernel SMP implementation using Synopsys Defensics. Reported-by: Pekka Oikarainen Signed-off-by: Matias Karhumaa Signed-off-by: Johan Hedberg --- net/bluetooth/mgmt.c | 7 ++----- net/bluetooth/smp.c | 29 +++++++++++++++++++++++++---- net/bluetooth/smp.h | 3 ++- 3 files changed, 29 insertions(+), 10 deletions(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 3bdc8f3ca259..ccce954f8146 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2434,9 +2434,8 @@ static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data, /* LE address type */ addr_type = le_addr_type(cp->addr.type); - hci_remove_irk(hdev, &cp->addr.bdaddr, addr_type); - - err = hci_remove_ltk(hdev, &cp->addr.bdaddr, addr_type); + /* Abort any ongoing SMP pairing. Removes ltk and irk if they exist. */ + err = smp_cancel_and_remove_pairing(hdev, &cp->addr.bdaddr, addr_type); if (err < 0) { err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_UNPAIR_DEVICE, MGMT_STATUS_NOT_PAIRED, &rp, @@ -2450,8 +2449,6 @@ static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data, goto done; } - /* Abort any ongoing SMP pairing */ - smp_cancel_pairing(conn); /* Defer clearing up the connection parameters until closing to * give a chance of keeping them if a repairing happens. diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 3a7b0773536b..73f7211d0431 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -2422,30 +2422,51 @@ unlock: return ret; } -void smp_cancel_pairing(struct hci_conn *hcon) +int smp_cancel_and_remove_pairing(struct hci_dev *hdev, bdaddr_t *bdaddr, + u8 addr_type) { - struct l2cap_conn *conn = hcon->l2cap_data; + struct hci_conn *hcon; + struct l2cap_conn *conn; struct l2cap_chan *chan; struct smp_chan *smp; + int err; + + err = hci_remove_ltk(hdev, bdaddr, addr_type); + hci_remove_irk(hdev, bdaddr, addr_type); + + hcon = hci_conn_hash_lookup_le(hdev, bdaddr, addr_type); + if (!hcon) + goto done; + conn = hcon->l2cap_data; if (!conn) - return; + goto done; chan = conn->smp; if (!chan) - return; + goto done; l2cap_chan_lock(chan); smp = chan->data; if (smp) { + /* Set keys to NULL to make sure smp_failure() does not try to + * remove and free already invalidated rcu list entries. */ + smp->ltk = NULL; + smp->slave_ltk = NULL; + smp->remote_irk = NULL; + if (test_bit(SMP_FLAG_COMPLETE, &smp->flags)) smp_failure(conn, 0); else smp_failure(conn, SMP_UNSPECIFIED); + err = 0; } l2cap_chan_unlock(chan); + +done: + return err; } static int smp_cmd_encrypt_info(struct l2cap_conn *conn, struct sk_buff *skb) diff --git a/net/bluetooth/smp.h b/net/bluetooth/smp.h index 0ff6247eaa6c..121edadd5f8d 100644 --- a/net/bluetooth/smp.h +++ b/net/bluetooth/smp.h @@ -181,7 +181,8 @@ enum smp_key_pref { }; /* SMP Commands */ -void smp_cancel_pairing(struct hci_conn *hcon); +int smp_cancel_and_remove_pairing(struct hci_dev *hdev, bdaddr_t *bdaddr, + u8 addr_type); bool smp_sufficient_security(struct hci_conn *hcon, u8 sec_level, enum smp_key_pref key_pref); int smp_conn_security(struct hci_conn *hcon, __u8 sec_level); -- cgit v1.2.3 From 36f19d5b4f99fa9fa8263877e5f8e669d7fddc14 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 26 Sep 2018 17:35:14 -0700 Subject: net/ipv6: Remove extra call to ip6_convert_metrics for multipath case The change to move metrics from the dst to rt6_info moved the call to ip6_convert_metrics from ip6_route_add to ip6_route_info_create. In doing so it makes the call in ip6_route_info_append redundant and actually leaks the metrics installed as part of the ip6_route_info_create. Remove the now unnecessary call. Fixes: d4ead6b34b67f ("net/ipv6: move metrics from dst to rt6_info") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/route.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 826b14de7dbb..a366c05a239d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -4321,11 +4321,6 @@ static int ip6_route_info_append(struct net *net, if (!nh) return -ENOMEM; nh->fib6_info = rt; - err = ip6_convert_metrics(net, rt, r_cfg); - if (err) { - kfree(nh); - return err; - } memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg)); list_add_tail(&nh->next, rt6_nh_list); -- cgit v1.2.3 From e0511f6c1ccdd153cf063764e93ac177a8553c5d Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sun, 23 Sep 2018 15:38:21 +0200 Subject: net: phy: fix WoL handling when suspending the PHY Actually there's nothing wrong with the two changes marked as "Fixes", they just revealed a problem which has been existing before. After having switched r8169 to phylib it was reported that WoL from shutdown doesn't work any longer (WoL from suspend isn't affected). Reason is that during shutdown phy_disconnect()->phy_detach()-> phy_suspend() is called. A similar issue occurs when the phylib state machine calls phy_suspend() when handling state PHY_HALTED. Core of the problem is that phy_suspend() suspends the PHY when it should not due to WoL. phy_suspend() checks for WoL already, but this works only if the PHY driver handles WoL (what is rarely the case). Typically WoL is handled by the MAC driver. phylib knows about this and handles it in mdio_bus_phy_may_suspend(), but that's used only when suspending the system, not in other cases like shutdown. Therefore factor out the relevant check from mdio_bus_phy_may_suspend() to a new function phy_may_suspend() and use it in phy_suspend(). Last but not least change phy_detach() to call phy_suspend() before attached_dev is set to NULL. phy_suspend() accesses attached_dev when checking whether the MAC driver activated WoL. Fixes: f1e911d5d0df ("r8169: add basic phylib support") Fixes: e8cfd9d6c772 ("net: phy: call state machine synchronously in phy_stop") Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 42 ++++++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index db1172db1e7c..2c80d3c44cd6 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -75,6 +75,26 @@ extern struct phy_driver genphy_10g_driver; static LIST_HEAD(phy_fixup_list); static DEFINE_MUTEX(phy_fixup_lock); +static bool phy_may_suspend(struct phy_device *phydev) +{ + struct net_device *netdev = phydev->attached_dev; + + if (!netdev) + return true; + + /* Don't suspend PHY if the attached netdev parent may wakeup. + * The parent may point to a PCI device, as in tg3 driver. + */ + if (netdev->dev.parent && device_may_wakeup(netdev->dev.parent)) + return false; + + /* Also don't suspend PHY if the netdev itself may wakeup. This + * is the case for devices w/o underlaying pwr. mgmt. aware bus, + * e.g. SoC devices. + */ + return !device_may_wakeup(&netdev->dev); +} + #ifdef CONFIG_PM static bool mdio_bus_phy_may_suspend(struct phy_device *phydev) { @@ -93,20 +113,7 @@ static bool mdio_bus_phy_may_suspend(struct phy_device *phydev) if (!netdev) return !phydev->suspended; - /* Don't suspend PHY if the attached netdev parent may wakeup. - * The parent may point to a PCI device, as in tg3 driver. - */ - if (netdev->dev.parent && device_may_wakeup(netdev->dev.parent)) - return false; - - /* Also don't suspend PHY if the netdev itself may wakeup. This - * is the case for devices w/o underlaying pwr. mgmt. aware bus, - * e.g. SoC devices. - */ - if (device_may_wakeup(&netdev->dev)) - return false; - - return true; + return phy_may_suspend(phydev); } static int mdio_bus_phy_suspend(struct device *dev) @@ -1132,9 +1139,9 @@ void phy_detach(struct phy_device *phydev) sysfs_remove_link(&dev->dev.kobj, "phydev"); sysfs_remove_link(&phydev->mdio.dev.kobj, "attached_dev"); } + phy_suspend(phydev); phydev->attached_dev->phydev = NULL; phydev->attached_dev = NULL; - phy_suspend(phydev); phydev->phylink = NULL; phy_led_triggers_unregister(phydev); @@ -1171,9 +1178,12 @@ int phy_suspend(struct phy_device *phydev) struct ethtool_wolinfo wol = { .cmd = ETHTOOL_GWOL }; int ret = 0; + if (phydev->suspended) + return 0; + /* If the device has WOL enabled, we cannot suspend the PHY */ phy_ethtool_get_wol(phydev, &wol); - if (wol.wolopts) + if (wol.wolopts || !phy_may_suspend(phydev)) return -EBUSY; if (phydev->drv && phydrv->suspend) -- cgit v1.2.3 From d31d1d03aa909aa6257d9d581eb0eb5d0ed366e2 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 26 Sep 2018 20:01:36 -0700 Subject: Revert "net: phy: fix WoL handling when suspending the PHY" This reverts commit e0511f6c1ccdd153cf063764e93ac177a8553c5d. I commited the wrong version of these changes. Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 42 ++++++++++++++++-------------------------- 1 file changed, 16 insertions(+), 26 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 2c80d3c44cd6..db1172db1e7c 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -75,26 +75,6 @@ extern struct phy_driver genphy_10g_driver; static LIST_HEAD(phy_fixup_list); static DEFINE_MUTEX(phy_fixup_lock); -static bool phy_may_suspend(struct phy_device *phydev) -{ - struct net_device *netdev = phydev->attached_dev; - - if (!netdev) - return true; - - /* Don't suspend PHY if the attached netdev parent may wakeup. - * The parent may point to a PCI device, as in tg3 driver. - */ - if (netdev->dev.parent && device_may_wakeup(netdev->dev.parent)) - return false; - - /* Also don't suspend PHY if the netdev itself may wakeup. This - * is the case for devices w/o underlaying pwr. mgmt. aware bus, - * e.g. SoC devices. - */ - return !device_may_wakeup(&netdev->dev); -} - #ifdef CONFIG_PM static bool mdio_bus_phy_may_suspend(struct phy_device *phydev) { @@ -113,7 +93,20 @@ static bool mdio_bus_phy_may_suspend(struct phy_device *phydev) if (!netdev) return !phydev->suspended; - return phy_may_suspend(phydev); + /* Don't suspend PHY if the attached netdev parent may wakeup. + * The parent may point to a PCI device, as in tg3 driver. + */ + if (netdev->dev.parent && device_may_wakeup(netdev->dev.parent)) + return false; + + /* Also don't suspend PHY if the netdev itself may wakeup. This + * is the case for devices w/o underlaying pwr. mgmt. aware bus, + * e.g. SoC devices. + */ + if (device_may_wakeup(&netdev->dev)) + return false; + + return true; } static int mdio_bus_phy_suspend(struct device *dev) @@ -1139,9 +1132,9 @@ void phy_detach(struct phy_device *phydev) sysfs_remove_link(&dev->dev.kobj, "phydev"); sysfs_remove_link(&phydev->mdio.dev.kobj, "attached_dev"); } - phy_suspend(phydev); phydev->attached_dev->phydev = NULL; phydev->attached_dev = NULL; + phy_suspend(phydev); phydev->phylink = NULL; phy_led_triggers_unregister(phydev); @@ -1178,12 +1171,9 @@ int phy_suspend(struct phy_device *phydev) struct ethtool_wolinfo wol = { .cmd = ETHTOOL_GWOL }; int ret = 0; - if (phydev->suspended) - return 0; - /* If the device has WOL enabled, we cannot suspend the PHY */ phy_ethtool_get_wol(phydev, &wol); - if (wol.wolopts || !phy_may_suspend(phydev)) + if (wol.wolopts) return -EBUSY; if (phydev->drv && phydrv->suspend) -- cgit v1.2.3 From 6194114324139dc16f3251c67ed853bd6d4ae056 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 24 Sep 2018 21:58:59 +0200 Subject: net: core: add member wol_enabled to struct net_device Add flag wol_enabled to struct net_device indicating whether Wake-on-LAN is enabled. As first user phy_suspend() will use it to decide whether PHY can be suspended or not. Fixes: f1e911d5d0df ("r8169: add basic phylib support") Fixes: e8cfd9d6c772 ("net: phy: call state machine synchronously in phy_stop") Signed-off-by: Heiner Kallweit Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +++ net/core/ethtool.c | 9 ++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ca5ab98053c8..c7861e4b402c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1730,6 +1730,8 @@ enum netdev_priv_flags { * switch driver and used to set the phys state of the * switch port. * + * @wol_enabled: Wake-on-LAN is enabled + * * FIXME: cleanup struct net_device such that network protocol info * moves out. */ @@ -2014,6 +2016,7 @@ struct net_device { struct lock_class_key *qdisc_tx_busylock; struct lock_class_key *qdisc_running_key; bool proto_down; + unsigned wol_enabled:1; }; #define to_net_dev(d) container_of(d, struct net_device, dev) diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 234a0ec2e932..0762aaf8e964 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1483,6 +1483,7 @@ static int ethtool_get_wol(struct net_device *dev, char __user *useraddr) static int ethtool_set_wol(struct net_device *dev, char __user *useraddr) { struct ethtool_wolinfo wol; + int ret; if (!dev->ethtool_ops->set_wol) return -EOPNOTSUPP; @@ -1490,7 +1491,13 @@ static int ethtool_set_wol(struct net_device *dev, char __user *useraddr) if (copy_from_user(&wol, useraddr, sizeof(wol))) return -EFAULT; - return dev->ethtool_ops->set_wol(dev, &wol); + ret = dev->ethtool_ops->set_wol(dev, &wol); + if (ret) + return ret; + + dev->wol_enabled = !!wol.wolopts; + + return 0; } static int ethtool_get_eee(struct net_device *dev, char __user *useraddr) -- cgit v1.2.3 From 93f41e67dc8ff0fd987120a6ef2717f21462c534 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 24 Sep 2018 22:01:32 +0200 Subject: net: phy: fix WoL handling when suspending the PHY Core of the problem is that phy_suspend() suspends the PHY when it should not because of WoL. phy_suspend() checks for WoL already, but this works only if the PHY driver handles WoL (what is rarely the case). Typically WoL is handled by the MAC driver. This patch uses new member wol_enabled of struct net_device as additional criteria in the check when not to suspend the PHY because of WoL. Last but not least change phy_detach() to call phy_suspend() before attached_dev is set to NULL. phy_suspend() accesses attached_dev when checking whether the MAC driver activated WoL. Fixes: f1e911d5d0df ("r8169: add basic phylib support") Fixes: e8cfd9d6c772 ("net: phy: call state machine synchronously in phy_stop") Signed-off-by: Heiner Kallweit Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index db1172db1e7c..19ab8a7d1e48 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -93,7 +93,12 @@ static bool mdio_bus_phy_may_suspend(struct phy_device *phydev) if (!netdev) return !phydev->suspended; - /* Don't suspend PHY if the attached netdev parent may wakeup. + if (netdev->wol_enabled) + return false; + + /* As long as not all affected network drivers support the + * wol_enabled flag, let's check for hints that WoL is enabled. + * Don't suspend PHY if the attached netdev parent may wake up. * The parent may point to a PCI device, as in tg3 driver. */ if (netdev->dev.parent && device_may_wakeup(netdev->dev.parent)) @@ -1132,9 +1137,9 @@ void phy_detach(struct phy_device *phydev) sysfs_remove_link(&dev->dev.kobj, "phydev"); sysfs_remove_link(&phydev->mdio.dev.kobj, "attached_dev"); } + phy_suspend(phydev); phydev->attached_dev->phydev = NULL; phydev->attached_dev = NULL; - phy_suspend(phydev); phydev->phylink = NULL; phy_led_triggers_unregister(phydev); @@ -1168,12 +1173,13 @@ EXPORT_SYMBOL(phy_detach); int phy_suspend(struct phy_device *phydev) { struct phy_driver *phydrv = to_phy_driver(phydev->mdio.dev.driver); + struct net_device *netdev = phydev->attached_dev; struct ethtool_wolinfo wol = { .cmd = ETHTOOL_GWOL }; int ret = 0; /* If the device has WOL enabled, we cannot suspend the PHY */ phy_ethtool_get_wol(phydev, &wol); - if (wol.wolopts) + if (wol.wolopts || (netdev && netdev->wol_enabled)) return -EBUSY; if (phydev->drv && phydrv->suspend) -- cgit v1.2.3 From 5a94df70d3878ae597dd8331ec0add491bdfe851 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 24 Sep 2018 16:51:41 +0100 Subject: qed: fix spelling mistake "toogle" -> "toggle" Trivial fix to spelling mistake in DP_VERBOSE message Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_rdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c index be941cfaa2d4..c71391b9c757 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c +++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c @@ -228,7 +228,7 @@ static int qed_rdma_alloc(struct qed_hwfn *p_hwfn, num_cons, "Toggle"); if (rc) { DP_VERBOSE(p_hwfn, QED_MSG_RDMA, - "Failed to allocate toogle bits, rc = %d\n", rc); + "Failed to allocate toggle bits, rc = %d\n", rc); goto free_cq_map; } -- cgit v1.2.3 From 079db3fd4ef164a59006425a988b1c73639fade8 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 24 Sep 2018 18:39:51 +0100 Subject: wimax/i2400m: fix spelling mistake "not unitialized" -> "uninitialized" Trivial fix to spelling mistake in ms_to_errno array of error messages and remove confusing "not" from the error text since the error code refers to an uninitialized error code. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/wimax/i2400m/control.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wimax/i2400m/control.c b/drivers/net/wimax/i2400m/control.c index 094cea775d0c..ef298d8525c5 100644 --- a/drivers/net/wimax/i2400m/control.c +++ b/drivers/net/wimax/i2400m/control.c @@ -257,7 +257,7 @@ static const struct [I2400M_MS_ACCESSIBILITY_ERROR] = { "accesibility error", -EIO }, [I2400M_MS_BUSY] = { "busy", -EBUSY }, [I2400M_MS_CORRUPTED_TLV] = { "corrupted TLV", -EILSEQ }, - [I2400M_MS_UNINITIALIZED] = { "not unitialized", -EILSEQ }, + [I2400M_MS_UNINITIALIZED] = { "uninitialized", -EILSEQ }, [I2400M_MS_UNKNOWN_ERROR] = { "unknown error", -EIO }, [I2400M_MS_PRODUCTION_ERROR] = { "production error", -EIO }, [I2400M_MS_NO_RF] = { "no RF", -EIO }, -- cgit v1.2.3 From a898fba32229efd5e6b6154f83fa86a7145156b9 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 24 Sep 2018 13:53:34 -0700 Subject: qed: Avoid implicit enum conversion in qed_set_tunn_cls_info Clang warns when one enumerated type is implicitly converted to another. drivers/net/ethernet/qlogic/qed/qed_sp_commands.c:163:25: warning: implicit conversion from enumeration type 'enum tunnel_clss' to different enumeration type 'enum qed_tunn_clss' [-Wenum-conversion] p_tun->vxlan.tun_cls = type; ~ ^~~~ drivers/net/ethernet/qlogic/qed/qed_sp_commands.c:165:26: warning: implicit conversion from enumeration type 'enum tunnel_clss' to different enumeration type 'enum qed_tunn_clss' [-Wenum-conversion] p_tun->l2_gre.tun_cls = type; ~ ^~~~ drivers/net/ethernet/qlogic/qed/qed_sp_commands.c:167:26: warning: implicit conversion from enumeration type 'enum tunnel_clss' to different enumeration type 'enum qed_tunn_clss' [-Wenum-conversion] p_tun->ip_gre.tun_cls = type; ~ ^~~~ drivers/net/ethernet/qlogic/qed/qed_sp_commands.c:169:29: warning: implicit conversion from enumeration type 'enum tunnel_clss' to different enumeration type 'enum qed_tunn_clss' [-Wenum-conversion] p_tun->l2_geneve.tun_cls = type; ~ ^~~~ drivers/net/ethernet/qlogic/qed/qed_sp_commands.c:171:29: warning: implicit conversion from enumeration type 'enum tunnel_clss' to different enumeration type 'enum qed_tunn_clss' [-Wenum-conversion] p_tun->ip_geneve.tun_cls = type; ~ ^~~~ 5 warnings generated. Avoid this by changing type to an int. Link: https://github.com/ClangBuiltLinux/linux/issues/125 Signed-off-by: Nathan Chancellor Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_sp_commands.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c index 8de644b4721e..77b6248ad3b9 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c @@ -154,7 +154,7 @@ qed_set_pf_update_tunn_mode(struct qed_tunnel_info *p_tun, static void qed_set_tunn_cls_info(struct qed_tunnel_info *p_tun, struct qed_tunnel_info *p_src) { - enum tunnel_clss type; + int type; p_tun->b_update_rx_cls = p_src->b_update_rx_cls; p_tun->b_update_tx_cls = p_src->b_update_tx_cls; -- cgit v1.2.3 From db803f36e56f23b5a2266807e190d1dc11554d54 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 24 Sep 2018 14:05:27 -0700 Subject: qed: Fix mask parameter in qed_vf_prep_tunn_req_tlv Clang complains when one enumerated type is implicitly converted to another. drivers/net/ethernet/qlogic/qed/qed_vf.c:686:6: warning: implicit conversion from enumeration type 'enum qed_tunn_mode' to different enumeration type 'enum qed_tunn_clss' [-Wenum-conversion] QED_MODE_L2GENEVE_TUNN, ^~~~~~~~~~~~~~~~~~~~~~ Update mask's parameter to expect qed_tunn_mode, which is what was intended. Link: https://github.com/ClangBuiltLinux/linux/issues/125 Signed-off-by: Nathan Chancellor Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_vf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.c b/drivers/net/ethernet/qlogic/qed/qed_vf.c index 3d4269659820..fcd8da08274f 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_vf.c +++ b/drivers/net/ethernet/qlogic/qed/qed_vf.c @@ -572,7 +572,7 @@ free_p_iov: static void __qed_vf_prep_tunn_req_tlv(struct vfpf_update_tunn_param_tlv *p_req, struct qed_tunn_update_type *p_src, - enum qed_tunn_clss mask, u8 *p_cls) + enum qed_tunn_mode mask, u8 *p_cls) { if (p_src->b_update_mode) { p_req->tun_mode_update_mask |= BIT(mask); @@ -587,7 +587,7 @@ __qed_vf_prep_tunn_req_tlv(struct vfpf_update_tunn_param_tlv *p_req, static void qed_vf_prep_tunn_req_tlv(struct vfpf_update_tunn_param_tlv *p_req, struct qed_tunn_update_type *p_src, - enum qed_tunn_clss mask, + enum qed_tunn_mode mask, u8 *p_cls, struct qed_tunn_update_udp_port *p_port, u8 *p_update_port, u16 *p_udp_port) { -- cgit v1.2.3 From d3a315795b4ce8b105a64a90699103121bde04a8 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 24 Sep 2018 14:34:53 -0700 Subject: qed: Avoid implicit enum conversion in qed_roce_mode_to_flavor Clang warns when one enumerated type is implicitly converted to another. drivers/net/ethernet/qlogic/qed/qed_roce.c:153:12: warning: implicit conversion from enumeration type 'enum roce_mode' to different enumeration type 'enum roce_flavor' [-Wenum-conversion] flavor = ROCE_V2_IPV6; ~ ^~~~~~~~~~~~ drivers/net/ethernet/qlogic/qed/qed_roce.c:156:12: warning: implicit conversion from enumeration type 'enum roce_mode' to different enumeration type 'enum roce_flavor' [-Wenum-conversion] flavor = MAX_ROCE_MODE; ~ ^~~~~~~~~~~~~ 2 warnings generated. Use the appropriate values from the expected type, roce_flavor: ROCE_V2_IPV6 = RROCE_IPV6 = 2 MAX_ROCE_MODE = MAX_ROCE_FLAVOR = 3 While we're add it, ditch the local variable flavor, we can just return the value directly from the switch statement. Link: https://github.com/ClangBuiltLinux/linux/issues/125 Signed-off-by: Nathan Chancellor Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_roce.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c index 7d7a64c55ff1..f9167d1354bb 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_roce.c +++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c @@ -140,23 +140,16 @@ static void qed_rdma_copy_gids(struct qed_rdma_qp *qp, __le32 *src_gid, static enum roce_flavor qed_roce_mode_to_flavor(enum roce_mode roce_mode) { - enum roce_flavor flavor; - switch (roce_mode) { case ROCE_V1: - flavor = PLAIN_ROCE; - break; + return PLAIN_ROCE; case ROCE_V2_IPV4: - flavor = RROCE_IPV4; - break; + return RROCE_IPV4; case ROCE_V2_IPV6: - flavor = ROCE_V2_IPV6; - break; + return RROCE_IPV6; default: - flavor = MAX_ROCE_MODE; - break; + return MAX_ROCE_FLAVOR; } - return flavor; } static void qed_roce_free_cid_pair(struct qed_hwfn *p_hwfn, u16 cid) -- cgit v1.2.3 From 6a9e461f6fe4434e6172304b69774daff9a3ac4c Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Mon, 24 Sep 2018 14:39:42 -0700 Subject: bonding: pass link-local packets to bonding master also. Commit b89f04c61efe ("bonding: deliver link-local packets with skb->dev set to link that packets arrived on") changed the behavior of how link-local-multicast packets are processed. The change in the behavior broke some legacy use cases where these packets are expected to arrive on bonding master device also. This patch passes the packet to the stack with the link it arrived on as well as passes to the bonding-master device to preserve the legacy use case. Fixes: b89f04c61efe ("bonding: deliver link-local packets with skb->dev set to link that packets arrived on") Reported-by: Michal Soltys Signed-off-by: Mahesh Bandewar Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 0d87e11e7f1d..8c0a0908875d 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1170,9 +1170,26 @@ static rx_handler_result_t bond_handle_frame(struct sk_buff **pskb) } } - /* don't change skb->dev for link-local packets */ - if (is_link_local_ether_addr(eth_hdr(skb)->h_dest)) + /* Link-local multicast packets should be passed to the + * stack on the link they arrive as well as pass them to the + * bond-master device. These packets are mostly usable when + * stack receives it with the link on which they arrive + * (e.g. LLDP) they also must be available on master. Some of + * the use cases include (but are not limited to): LLDP agents + * that must be able to operate both on enslaved interfaces as + * well as on bonds themselves; linux bridges that must be able + * to process/pass BPDUs from attached bonds when any kind of + * STP version is enabled on the network. + */ + if (is_link_local_ether_addr(eth_hdr(skb)->h_dest)) { + struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); + + if (nskb) { + nskb->dev = bond->dev; + netif_rx(nskb); + } return RX_HANDLER_PASS; + } if (bond_should_deliver_exact_match(skb, slave, bond)) return RX_HANDLER_EXACT; -- cgit v1.2.3 From d4859d749aa7090ffb743d15648adb962a1baeae Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Mon, 24 Sep 2018 14:40:11 -0700 Subject: bonding: avoid possible dead-lock Syzkaller reported this on a slightly older kernel but it's still applicable to the current kernel - ====================================================== WARNING: possible circular locking dependency detected 4.18.0-next-20180823+ #46 Not tainted ------------------------------------------------------ syz-executor4/26841 is trying to acquire lock: 00000000dd41ef48 ((wq_completion)bond_dev->name){+.+.}, at: flush_workqueue+0x2db/0x1e10 kernel/workqueue.c:2652 but task is already holding lock: 00000000768ab431 (rtnl_mutex){+.+.}, at: rtnl_lock net/core/rtnetlink.c:77 [inline] 00000000768ab431 (rtnl_mutex){+.+.}, at: rtnetlink_rcv_msg+0x412/0xc30 net/core/rtnetlink.c:4708 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (rtnl_mutex){+.+.}: __mutex_lock_common kernel/locking/mutex.c:925 [inline] __mutex_lock+0x171/0x1700 kernel/locking/mutex.c:1073 mutex_lock_nested+0x16/0x20 kernel/locking/mutex.c:1088 rtnl_lock+0x17/0x20 net/core/rtnetlink.c:77 bond_netdev_notify drivers/net/bonding/bond_main.c:1310 [inline] bond_netdev_notify_work+0x44/0xd0 drivers/net/bonding/bond_main.c:1320 process_one_work+0xc73/0x1aa0 kernel/workqueue.c:2153 worker_thread+0x189/0x13c0 kernel/workqueue.c:2296 kthread+0x35a/0x420 kernel/kthread.c:246 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:415 -> #1 ((work_completion)(&(&nnw->work)->work)){+.+.}: process_one_work+0xc0b/0x1aa0 kernel/workqueue.c:2129 worker_thread+0x189/0x13c0 kernel/workqueue.c:2296 kthread+0x35a/0x420 kernel/kthread.c:246 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:415 -> #0 ((wq_completion)bond_dev->name){+.+.}: lock_acquire+0x1e4/0x4f0 kernel/locking/lockdep.c:3901 flush_workqueue+0x30a/0x1e10 kernel/workqueue.c:2655 drain_workqueue+0x2a9/0x640 kernel/workqueue.c:2820 destroy_workqueue+0xc6/0x9d0 kernel/workqueue.c:4155 __alloc_workqueue_key+0xef9/0x1190 kernel/workqueue.c:4138 bond_init+0x269/0x940 drivers/net/bonding/bond_main.c:4734 register_netdevice+0x337/0x1100 net/core/dev.c:8410 bond_newlink+0x49/0xa0 drivers/net/bonding/bond_netlink.c:453 rtnl_newlink+0xef4/0x1d50 net/core/rtnetlink.c:3099 rtnetlink_rcv_msg+0x46e/0xc30 net/core/rtnetlink.c:4711 netlink_rcv_skb+0x172/0x440 net/netlink/af_netlink.c:2454 rtnetlink_rcv+0x1c/0x20 net/core/rtnetlink.c:4729 netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline] netlink_unicast+0x5a0/0x760 net/netlink/af_netlink.c:1343 netlink_sendmsg+0xa18/0xfc0 net/netlink/af_netlink.c:1908 sock_sendmsg_nosec net/socket.c:622 [inline] sock_sendmsg+0xd5/0x120 net/socket.c:632 ___sys_sendmsg+0x7fd/0x930 net/socket.c:2115 __sys_sendmsg+0x11d/0x290 net/socket.c:2153 __do_sys_sendmsg net/socket.c:2162 [inline] __se_sys_sendmsg net/socket.c:2160 [inline] __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2160 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe other info that might help us debug this: Chain exists of: (wq_completion)bond_dev->name --> (work_completion)(&(&nnw->work)->work) --> rtnl_mutex Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(rtnl_mutex); lock((work_completion)(&(&nnw->work)->work)); lock(rtnl_mutex); lock((wq_completion)bond_dev->name); *** DEADLOCK *** 1 lock held by syz-executor4/26841: stack backtrace: CPU: 1 PID: 26841 Comm: syz-executor4 Not tainted 4.18.0-next-20180823+ #46 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1c9/0x2b4 lib/dump_stack.c:113 print_circular_bug.isra.34.cold.55+0x1bd/0x27d kernel/locking/lockdep.c:1222 check_prev_add kernel/locking/lockdep.c:1862 [inline] check_prevs_add kernel/locking/lockdep.c:1975 [inline] validate_chain kernel/locking/lockdep.c:2416 [inline] __lock_acquire+0x3449/0x5020 kernel/locking/lockdep.c:3412 lock_acquire+0x1e4/0x4f0 kernel/locking/lockdep.c:3901 flush_workqueue+0x30a/0x1e10 kernel/workqueue.c:2655 drain_workqueue+0x2a9/0x640 kernel/workqueue.c:2820 destroy_workqueue+0xc6/0x9d0 kernel/workqueue.c:4155 __alloc_workqueue_key+0xef9/0x1190 kernel/workqueue.c:4138 bond_init+0x269/0x940 drivers/net/bonding/bond_main.c:4734 register_netdevice+0x337/0x1100 net/core/dev.c:8410 bond_newlink+0x49/0xa0 drivers/net/bonding/bond_netlink.c:453 rtnl_newlink+0xef4/0x1d50 net/core/rtnetlink.c:3099 rtnetlink_rcv_msg+0x46e/0xc30 net/core/rtnetlink.c:4711 netlink_rcv_skb+0x172/0x440 net/netlink/af_netlink.c:2454 rtnetlink_rcv+0x1c/0x20 net/core/rtnetlink.c:4729 netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline] netlink_unicast+0x5a0/0x760 net/netlink/af_netlink.c:1343 netlink_sendmsg+0xa18/0xfc0 net/netlink/af_netlink.c:1908 sock_sendmsg_nosec net/socket.c:622 [inline] sock_sendmsg+0xd5/0x120 net/socket.c:632 ___sys_sendmsg+0x7fd/0x930 net/socket.c:2115 __sys_sendmsg+0x11d/0x290 net/socket.c:2153 __do_sys_sendmsg net/socket.c:2162 [inline] __se_sys_sendmsg net/socket.c:2160 [inline] __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2160 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x457089 Code: fd b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 cb b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007f2df20a5c78 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007f2df20a66d4 RCX: 0000000000457089 RDX: 0000000000000000 RSI: 0000000020000180 RDI: 0000000000000003 RBP: 0000000000930140 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff R13: 00000000004d40b8 R14: 00000000004c8ad8 R15: 0000000000000001 Signed-off-by: Mahesh Bandewar Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 43 ++++++++++++++++------------------------- include/net/bonding.h | 7 +------ 2 files changed, 18 insertions(+), 32 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 8c0a0908875d..c05c01a00755 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -210,6 +210,7 @@ static void bond_get_stats(struct net_device *bond_dev, static void bond_slave_arr_handler(struct work_struct *work); static bool bond_time_in_interval(struct bonding *bond, unsigned long last_act, int mod); +static void bond_netdev_notify_work(struct work_struct *work); /*---------------------------- General routines -----------------------------*/ @@ -1286,6 +1287,8 @@ static struct slave *bond_alloc_slave(struct bonding *bond) return NULL; } } + INIT_DELAYED_WORK(&slave->notify_work, bond_netdev_notify_work); + return slave; } @@ -1293,6 +1296,7 @@ static void bond_free_slave(struct slave *slave) { struct bonding *bond = bond_get_bond_by_slave(slave); + cancel_delayed_work_sync(&slave->notify_work); if (BOND_MODE(bond) == BOND_MODE_8023AD) kfree(SLAVE_AD_INFO(slave)); @@ -1314,39 +1318,26 @@ static void bond_fill_ifslave(struct slave *slave, struct ifslave *info) info->link_failure_count = slave->link_failure_count; } -static void bond_netdev_notify(struct net_device *dev, - struct netdev_bonding_info *info) -{ - rtnl_lock(); - netdev_bonding_info_change(dev, info); - rtnl_unlock(); -} - static void bond_netdev_notify_work(struct work_struct *_work) { - struct netdev_notify_work *w = - container_of(_work, struct netdev_notify_work, work.work); + struct slave *slave = container_of(_work, struct slave, + notify_work.work); + + if (rtnl_trylock()) { + struct netdev_bonding_info binfo; - bond_netdev_notify(w->dev, &w->bonding_info); - dev_put(w->dev); - kfree(w); + bond_fill_ifslave(slave, &binfo.slave); + bond_fill_ifbond(slave->bond, &binfo.master); + netdev_bonding_info_change(slave->dev, &binfo); + rtnl_unlock(); + } else { + queue_delayed_work(slave->bond->wq, &slave->notify_work, 1); + } } void bond_queue_slave_event(struct slave *slave) { - struct bonding *bond = slave->bond; - struct netdev_notify_work *nnw = kzalloc(sizeof(*nnw), GFP_ATOMIC); - - if (!nnw) - return; - - dev_hold(slave->dev); - nnw->dev = slave->dev; - bond_fill_ifslave(slave, &nnw->bonding_info.slave); - bond_fill_ifbond(bond, &nnw->bonding_info.master); - INIT_DELAYED_WORK(&nnw->work, bond_netdev_notify_work); - - queue_delayed_work(slave->bond->wq, &nnw->work, 0); + queue_delayed_work(slave->bond->wq, &slave->notify_work, 0); } void bond_lower_state_changed(struct slave *slave) diff --git a/include/net/bonding.h b/include/net/bonding.h index a2d058170ea3..b46d68acf701 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -139,12 +139,6 @@ struct bond_parm_tbl { int mode; }; -struct netdev_notify_work { - struct delayed_work work; - struct net_device *dev; - struct netdev_bonding_info bonding_info; -}; - struct slave { struct net_device *dev; /* first - useful for panic debug */ struct bonding *bond; /* our master */ @@ -172,6 +166,7 @@ struct slave { #ifdef CONFIG_NET_POLL_CONTROLLER struct netpoll *np; #endif + struct delayed_work notify_work; struct kobject kobj; struct rtnl_link_stats64 slave_stats; }; -- cgit v1.2.3 From 1c492a9d55ba99079210ed901dd8a5423f980487 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 24 Sep 2018 15:17:03 -0700 Subject: qed: Avoid constant logical operation warning in qed_vf_pf_acquire Clang warns when a constant is used in a boolean context as it thinks a bitwise operation may have been intended. drivers/net/ethernet/qlogic/qed/qed_vf.c:415:27: warning: use of logical '&&' with constant operand [-Wconstant-logical-operand] if (!p_iov->b_pre_fp_hsi && ^ drivers/net/ethernet/qlogic/qed/qed_vf.c:415:27: note: use '&' for a bitwise operation if (!p_iov->b_pre_fp_hsi && ^~ & drivers/net/ethernet/qlogic/qed/qed_vf.c:415:27: note: remove constant to silence this warning if (!p_iov->b_pre_fp_hsi && ~^~ 1 warning generated. This has been here since commit 1fe614d10f45 ("qed: Relax VF firmware requirements") and I am not entirely sure why since 0 isn't a special case. Just remove the statement causing Clang to warn since it isn't required. Link: https://github.com/ClangBuiltLinux/linux/issues/126 Signed-off-by: Nathan Chancellor Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_vf.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.c b/drivers/net/ethernet/qlogic/qed/qed_vf.c index fcd8da08274f..be118d057b92 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_vf.c +++ b/drivers/net/ethernet/qlogic/qed/qed_vf.c @@ -413,7 +413,6 @@ static int qed_vf_pf_acquire(struct qed_hwfn *p_hwfn) } if (!p_iov->b_pre_fp_hsi && - ETH_HSI_VER_MINOR && (resp->pfdev_info.minor_fp_hsi < ETH_HSI_VER_MINOR)) { DP_INFO(p_hwfn, "PF is using older fastpath HSI; %02x.%02x is configured\n", -- cgit v1.2.3 From 77f2d753819b7d50c16abfb778caf1fe075faed0 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 24 Sep 2018 14:42:12 -0700 Subject: qed: Avoid implicit enum conversion in qed_iwarp_parse_rx_pkt Clang warns when one enumerated type is implicitly converted to another. drivers/net/ethernet/qlogic/qed/qed_iwarp.c:1713:25: warning: implicit conversion from enumeration type 'enum tcp_ip_version' to different enumeration type 'enum qed_tcp_ip_version' [-Wenum-conversion] cm_info->ip_version = TCP_IPV4; ~ ^~~~~~~~ drivers/net/ethernet/qlogic/qed/qed_iwarp.c:1733:25: warning: implicit conversion from enumeration type 'enum tcp_ip_version' to different enumeration type 'enum qed_tcp_ip_version' [-Wenum-conversion] cm_info->ip_version = TCP_IPV6; ~ ^~~~~~~~ 2 warnings generated. Use the appropriate values from the expected type, qed_tcp_ip_version: TCP_IPV4 = QED_TCP_IPV4 = 0 TCP_IPV6 = QED_TCP_IPV6 = 1 Link: https://github.com/ClangBuiltLinux/linux/issues/125 Signed-off-by: Nathan Chancellor Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_iwarp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c index 17f3dfa2cc94..e860bdf0f752 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c @@ -1710,7 +1710,7 @@ qed_iwarp_parse_rx_pkt(struct qed_hwfn *p_hwfn, cm_info->local_ip[0] = ntohl(iph->daddr); cm_info->remote_ip[0] = ntohl(iph->saddr); - cm_info->ip_version = TCP_IPV4; + cm_info->ip_version = QED_TCP_IPV4; ip_hlen = (iph->ihl) * sizeof(u32); *payload_len = ntohs(iph->tot_len) - ip_hlen; @@ -1730,7 +1730,7 @@ qed_iwarp_parse_rx_pkt(struct qed_hwfn *p_hwfn, cm_info->remote_ip[i] = ntohl(ip6h->saddr.in6_u.u6_addr32[i]); } - cm_info->ip_version = TCP_IPV6; + cm_info->ip_version = QED_TCP_IPV6; ip_hlen = sizeof(*ip6h); *payload_len = ntohs(ip6h->payload_len); -- cgit v1.2.3 From 3e322474485931e7ea6e4c5560089991a7f03cbc Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Tue, 25 Sep 2018 01:50:00 +0200 Subject: net: phy: sfp: Fix unregistering of HWMON SFP device A HWMON device is only registered is the SFP module supports the diagnostic page and is complient to SFF8472. Don't unconditionally unregister the hwmon device when the SFP module is remove, otherwise we access data structures which don't exist. Reported-by: Florian Fainelli Fixes: 1323061a018a ("net: phy: sfp: Add HWMON support for module sensors") Signed-off-by: Andrew Lunn Tested-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/sfp.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index 52fffb98fde9..6e13b8832bc7 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -1098,8 +1098,11 @@ static int sfp_hwmon_insert(struct sfp *sfp) static void sfp_hwmon_remove(struct sfp *sfp) { - hwmon_device_unregister(sfp->hwmon_dev); - kfree(sfp->hwmon_name); + if (!IS_ERR_OR_NULL(sfp->hwmon_dev)) { + hwmon_device_unregister(sfp->hwmon_dev); + sfp->hwmon_dev = NULL; + kfree(sfp->hwmon_name); + } } #else static int sfp_hwmon_insert(struct sfp *sfp) -- cgit v1.2.3 From 8fd780698745ba121530c5c20fd237aacde4c371 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 26 Sep 2018 10:35:42 +0800 Subject: vxlan: fill ttl inherit info When add vxlan ttl inherit support, I forgot to fill it when dump vlxan info. Fix it now. Fixes: 72f6d71e491e6 ("vxlan: add ttl inherit support") Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index ababba37d735..2b8da2b7e721 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -3539,6 +3539,7 @@ static size_t vxlan_get_size(const struct net_device *dev) nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_LINK */ nla_total_size(sizeof(struct in6_addr)) + /* IFLA_VXLAN_LOCAL{6} */ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TTL */ + nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TTL_INHERIT */ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TOS */ nla_total_size(sizeof(__be32)) + /* IFLA_VXLAN_LABEL */ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_LEARNING */ @@ -3603,6 +3604,8 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev) } if (nla_put_u8(skb, IFLA_VXLAN_TTL, vxlan->cfg.ttl) || + nla_put_u8(skb, IFLA_VXLAN_TTL_INHERIT, + !!(vxlan->cfg.flags & VXLAN_F_TTL_INHERIT)) || nla_put_u8(skb, IFLA_VXLAN_TOS, vxlan->cfg.tos) || nla_put_be32(skb, IFLA_VXLAN_LABEL, vxlan->cfg.label) || nla_put_u8(skb, IFLA_VXLAN_LEARNING, -- cgit v1.2.3 From 73f21c653f930f438d53eed29b5e4c65c8a0f906 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Wed, 26 Sep 2018 00:41:04 -0400 Subject: bnxt_en: Fix TX timeout during netpoll. The current netpoll implementation in the bnxt_en driver has problems that may miss TX completion events. bnxt_poll_work() in effect is only handling at most 1 TX packet before exiting. In addition, there may be in flight TX completions that ->poll() may miss even after we fix bnxt_poll_work() to handle all visible TX completions. netpoll may not call ->poll() again and HW may not generate IRQ because the driver does not ARM the IRQ when the budget (0 for netpoll) is reached. We fix it by handling all TX completions and to always ARM the IRQ when we exit ->poll() with 0 budget. Also, the logic to ACK the completion ring in case it is almost filled with TX completions need to be adjusted to take care of the 0 budget case, as discussed with Eric Dumazet Reported-by: Song Liu Reviewed-by: Song Liu Tested-by: Song Liu Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 61957b0bbd8c..0478e562abac 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -1884,8 +1884,11 @@ static int bnxt_poll_work(struct bnxt *bp, struct bnxt_napi *bnapi, int budget) if (TX_CMP_TYPE(txcmp) == CMP_TYPE_TX_L2_CMP) { tx_pkts++; /* return full budget so NAPI will complete. */ - if (unlikely(tx_pkts > bp->tx_wake_thresh)) + if (unlikely(tx_pkts > bp->tx_wake_thresh)) { rx_pkts = budget; + raw_cons = NEXT_RAW_CMP(raw_cons); + break; + } } else if ((TX_CMP_TYPE(txcmp) & 0x30) == 0x10) { if (likely(budget)) rc = bnxt_rx_pkt(bp, bnapi, &raw_cons, &event); @@ -1913,7 +1916,7 @@ static int bnxt_poll_work(struct bnxt *bp, struct bnxt_napi *bnapi, int budget) } raw_cons = NEXT_RAW_CMP(raw_cons); - if (rx_pkts == budget) + if (rx_pkts && rx_pkts == budget) break; } @@ -2027,8 +2030,12 @@ static int bnxt_poll(struct napi_struct *napi, int budget) while (1) { work_done += bnxt_poll_work(bp, bnapi, budget - work_done); - if (work_done >= budget) + if (work_done >= budget) { + if (!budget) + BNXT_CP_DB_REARM(cpr->cp_doorbell, + cpr->cp_raw_cons); break; + } if (!bnxt_has_work(bp, cpr)) { if (napi_complete_done(napi, work_done)) -- cgit v1.2.3 From d4ce58082f206bf6e7d697380c7bc5480a8b0264 Mon Sep 17 00:00:00 2001 From: Maciej Żenczykowski Date: Tue, 25 Sep 2018 21:59:28 -0700 Subject: net-tcp: /proc/sys/net/ipv4/tcp_probe_interval is a u32 not int MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit (fix documentation and sysctl access to treat it as such) Tested: # zcat /proc/config.gz | egrep ^CONFIG_HZ CONFIG_HZ_1000=y CONFIG_HZ=1000 # echo $[(1<<32)/1000 + 1] | tee /proc/sys/net/ipv4/tcp_probe_interval 4294968 tee: /proc/sys/net/ipv4/tcp_probe_interval: Invalid argument # echo $[(1<<32)/1000] | tee /proc/sys/net/ipv4/tcp_probe_interval 4294967 # echo 0 | tee /proc/sys/net/ipv4/tcp_probe_interval # echo -1 | tee /proc/sys/net/ipv4/tcp_probe_interval -1 tee: /proc/sys/net/ipv4/tcp_probe_interval: Invalid argument Signed-off-by: Maciej Żenczykowski Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 2 +- net/ipv4/sysctl_net_ipv4.c | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 8313a636dd53..960de8fe3f40 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -425,7 +425,7 @@ tcp_mtu_probing - INTEGER 1 - Disabled by default, enabled when an ICMP black hole detected 2 - Always enabled, use initial MSS of tcp_base_mss. -tcp_probe_interval - INTEGER +tcp_probe_interval - UNSIGNED INTEGER Controls how often to start TCP Packetization-Layer Path MTU Discovery reprobe. The default is reprobing every 10 minutes as per RFC4821. diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index b92f422f2fa8..891ed2f91467 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -48,6 +48,7 @@ static int tcp_syn_retries_max = MAX_TCP_SYNCNT; static int ip_ping_group_range_min[] = { 0, 0 }; static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX }; static int comp_sack_nr_max = 255; +static u32 u32_max_div_HZ = UINT_MAX / HZ; /* obsolete */ static int sysctl_tcp_low_latency __read_mostly; @@ -745,9 +746,10 @@ static struct ctl_table ipv4_net_table[] = { { .procname = "tcp_probe_interval", .data = &init_net.ipv4.sysctl_tcp_probe_interval, - .maxlen = sizeof(int), + .maxlen = sizeof(u32), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_douintvec_minmax, + .extra2 = &u32_max_div_HZ, }, { .procname = "igmp_link_local_mcast_reports", -- cgit v1.2.3 From 1222a16014888ed9733c11e221730d4a8196222b Mon Sep 17 00:00:00 2001 From: Masashi Honma Date: Tue, 25 Sep 2018 11:15:01 +0900 Subject: nl80211: Fix possible Spectre-v1 for CQM RSSI thresholds Use array_index_nospec() to sanitize i with respect to speculation. Note that the user doesn't control i directly, but can make it out of bounds by not finding a threshold in the array. Signed-off-by: Masashi Honma [add note about user control, as explained by Masashi] Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index bd26230de63e..176edfefcbaa 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -10231,7 +10231,7 @@ static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev = dev->ieee80211_ptr; s32 last, low, high; u32 hyst; - int i, n; + int i, n, low_index; int err; /* RSSI reporting disabled? */ @@ -10268,10 +10268,19 @@ static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev, if (last < wdev->cqm_config->rssi_thresholds[i]) break; - low = i > 0 ? - (wdev->cqm_config->rssi_thresholds[i - 1] - hyst) : S32_MIN; - high = i < n ? - (wdev->cqm_config->rssi_thresholds[i] + hyst - 1) : S32_MAX; + low_index = i - 1; + if (low_index >= 0) { + low_index = array_index_nospec(low_index, n); + low = wdev->cqm_config->rssi_thresholds[low_index] - hyst; + } else { + low = S32_MIN; + } + if (i < n) { + i = array_index_nospec(i, n); + high = wdev->cqm_config->rssi_thresholds[i] + hyst - 1; + } else { + high = S32_MAX; + } return rdev_set_cqm_rssi_range_config(rdev, dev, low, high); } -- cgit v1.2.3 From 659902db7008f233e3d514c774b5ff3d46c8edd8 Mon Sep 17 00:00:00 2001 From: zhong jiang Date: Fri, 14 Sep 2018 13:04:37 +0800 Subject: ieee802154: ca8210: remove redundant condition check before debugfs_remove debugfs_remove has taken the IS_ERR into account. Just remove the unnecessary condition. Signed-off-by: zhong jiang Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/ca8210.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ieee802154/ca8210.c b/drivers/net/ieee802154/ca8210.c index e21279dde85c..0ff5a403a8dc 100644 --- a/drivers/net/ieee802154/ca8210.c +++ b/drivers/net/ieee802154/ca8210.c @@ -3043,8 +3043,7 @@ static void ca8210_test_interface_clear(struct ca8210_priv *priv) { struct ca8210_test *test = &priv->test; - if (!IS_ERR(test->ca8210_dfs_spi_int)) - debugfs_remove(test->ca8210_dfs_spi_int); + debugfs_remove(test->ca8210_dfs_spi_int); kfifo_free(&test->up_fifo); dev_info(&priv->spi->dev, "Test interface removed\n"); } -- cgit v1.2.3 From 092ffc51fb3f9b8369e737c9320bf0bffb2c898f Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 27 Sep 2018 15:13:07 +0100 Subject: rxrpc: Remove dup code from rxrpc_find_connection_rcu() rxrpc_find_connection_rcu() initialises variable k twice with the same information. Remove one of the initialisations. Signed-off-by: David Howells --- net/rxrpc/conn_object.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 77440a356b14..1746b48cb165 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -85,9 +85,6 @@ struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *local, if (rxrpc_extract_addr_from_skb(local, &srx, skb) < 0) goto not_found; - k.epoch = sp->hdr.epoch; - k.cid = sp->hdr.cid & RXRPC_CIDMASK; - /* We may have to handle mixing IPv4 and IPv6 */ if (srx.transport.family != local->srx.transport.family) { pr_warn_ratelimited("AF_RXRPC: Protocol mismatch %u not %u\n", -- cgit v1.2.3 From d6d1cd2578c4da0764ad334e3411c1c1b1557f58 Mon Sep 17 00:00:00 2001 From: Xue Liu Date: Fri, 31 Aug 2018 23:46:41 +0200 Subject: ieee802154: mcr20a: Replace magic number with constants The combination of defined constants are used to present the state of IRQ so the magic numbers has been replaced. This is a simple coding style change which should have no impact on runtime code execution. Signed-off-by: Xue Liu Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/mcr20a.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ieee802154/mcr20a.c b/drivers/net/ieee802154/mcr20a.c index e428277781ac..04891429a554 100644 --- a/drivers/net/ieee802154/mcr20a.c +++ b/drivers/net/ieee802154/mcr20a.c @@ -903,19 +903,19 @@ mcr20a_irq_clean_complete(void *context) switch (seq_state) { /* TX IRQ, RX IRQ and SEQ IRQ */ - case (0x03): + case (DAR_IRQSTS1_TXIRQ | DAR_IRQSTS1_SEQIRQ): if (lp->is_tx) { lp->is_tx = 0; dev_dbg(printdev(lp), "TX is done. No ACK\n"); mcr20a_handle_tx_complete(lp); } break; - case (0x05): + case (DAR_IRQSTS1_RXIRQ | DAR_IRQSTS1_SEQIRQ): /* rx is starting */ dev_dbg(printdev(lp), "RX is starting\n"); mcr20a_handle_rx(lp); break; - case (0x07): + case (DAR_IRQSTS1_RXIRQ | DAR_IRQSTS1_TXIRQ | DAR_IRQSTS1_SEQIRQ): if (lp->is_tx) { /* tx is done */ lp->is_tx = 0; @@ -927,7 +927,7 @@ mcr20a_irq_clean_complete(void *context) mcr20a_handle_rx(lp); } break; - case (0x01): + case (DAR_IRQSTS1_SEQIRQ): if (lp->is_tx) { dev_dbg(printdev(lp), "TX is starting\n"); mcr20a_handle_tx(lp); -- cgit v1.2.3 From 3baafeffa48a12b3cec9a0b6d4049fba02d53cea Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Tue, 25 Sep 2018 16:56:53 +0300 Subject: iwlwifi: 1000: set the TFD queue size .max_tfd_queue_size was ommited for 1000 card serries leading to oops in swiotlb. Fixes: 7b3e42ea2ead ("iwlwifi: support multiple tfd queue max sizes for different devices") Tested-by: Randy Dunlap Signed-off-by: Pavel Machek Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo --- drivers/net/wireless/intel/iwlwifi/cfg/1000.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/1000.c b/drivers/net/wireless/intel/iwlwifi/cfg/1000.c index 591687984962..497fd766d87c 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/1000.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/1000.c @@ -51,6 +51,7 @@ static const struct iwl_base_params iwl1000_base_params = { .num_of_queues = IWLAGN_NUM_QUEUES, + .max_tfd_queue_size = 256, .eeprom_size = OTP_LOW_IMAGE_SIZE, .pll_cfg = true, .max_ll_items = OTP_MAX_LL_ITEMS_1000, -- cgit v1.2.3 From dc71db34e4f3c06b8277c8f3c2ff014610607a8c Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 27 Sep 2018 15:13:08 +0100 Subject: rxrpc: Fix checks as to whether we should set up a new call There's a check in rxrpc_data_ready() that's checking the CLIENT_INITIATED flag in the packet type field rather than in the packet flags field. Fix this by creating a pair of helper functions to check whether the packet is going to the client or to the server and use them generally. Fixes: 248f219cb8bc ("rxrpc: Rewrite the data and ack handling code") Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 10 ++++++++++ net/rxrpc/conn_object.c | 2 +- net/rxrpc/input.c | 12 ++++-------- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index c97558710421..9fcb3e197b14 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -463,6 +463,16 @@ struct rxrpc_connection { u8 out_clientflag; /* RXRPC_CLIENT_INITIATED if we are client */ }; +static inline bool rxrpc_to_server(const struct rxrpc_skb_priv *sp) +{ + return sp->hdr.flags & RXRPC_CLIENT_INITIATED; +} + +static inline bool rxrpc_to_client(const struct rxrpc_skb_priv *sp) +{ + return !rxrpc_to_server(sp); +} + /* * Flags in call->flags. */ diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 1746b48cb165..390ba50cfab4 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -96,7 +96,7 @@ struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *local, k.epoch = sp->hdr.epoch; k.cid = sp->hdr.cid & RXRPC_CIDMASK; - if (sp->hdr.flags & RXRPC_CLIENT_INITIATED) { + if (rxrpc_to_server(sp)) { /* We need to look up service connections by the full protocol * parameter set. We look up the peer first as an intermediate * step and then the connection from the peer's tree. diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index cfdc199c6351..ec299c627f77 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -1177,10 +1177,6 @@ void rxrpc_data_ready(struct sock *udp_sk) trace_rxrpc_rx_packet(sp); - _net("Rx RxRPC %s ep=%x call=%x:%x", - sp->hdr.flags & RXRPC_CLIENT_INITIATED ? "ToServer" : "ToClient", - sp->hdr.epoch, sp->hdr.cid, sp->hdr.callNumber); - if (sp->hdr.type >= RXRPC_N_PACKET_TYPES || !((RXRPC_SUPPORTED_PACKET_TYPES >> sp->hdr.type) & 1)) { _proto("Rx Bad Packet Type %u", sp->hdr.type); @@ -1189,13 +1185,13 @@ void rxrpc_data_ready(struct sock *udp_sk) switch (sp->hdr.type) { case RXRPC_PACKET_TYPE_VERSION: - if (!(sp->hdr.flags & RXRPC_CLIENT_INITIATED)) + if (rxrpc_to_client(sp)) goto discard; rxrpc_post_packet_to_local(local, skb); goto out; case RXRPC_PACKET_TYPE_BUSY: - if (sp->hdr.flags & RXRPC_CLIENT_INITIATED) + if (rxrpc_to_server(sp)) goto discard; /* Fall through */ @@ -1280,7 +1276,7 @@ void rxrpc_data_ready(struct sock *udp_sk) call = rcu_dereference(chan->call); if (sp->hdr.callNumber > chan->call_id) { - if (!(sp->hdr.flags & RXRPC_CLIENT_INITIATED)) { + if (rxrpc_to_client(sp)) { rcu_read_unlock(); goto reject_packet; } @@ -1303,7 +1299,7 @@ void rxrpc_data_ready(struct sock *udp_sk) } if (!call || atomic_read(&call->usage) == 0) { - if (!(sp->hdr.type & RXRPC_CLIENT_INITIATED) || + if (rxrpc_to_client(sp) || sp->hdr.callNumber == 0 || sp->hdr.type != RXRPC_PACKET_TYPE_DATA) goto bad_message_unlock; -- cgit v1.2.3 From b604dd9883f783a94020d772e4fe03160f455372 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 27 Sep 2018 15:13:08 +0100 Subject: rxrpc: Fix RTT gathering Fix RTT information gathering in AF_RXRPC by the following means: (1) Enable Rx timestamping on the transport socket with SO_TIMESTAMPNS. (2) If the sk_buff doesn't have a timestamp set when rxrpc_data_ready() collects it, set it at that point. (3) Allow ACKs to be requested on the last packet of a client call, but not a service call. We need to be careful lest we undo: bf7d620abf22c321208a4da4f435e7af52551a21 Author: David Howells Date: Thu Oct 6 08:11:51 2016 +0100 rxrpc: Don't request an ACK on the last DATA packet of a call's Tx phase but that only really applies to service calls that we're handling, since the client side gets to send the final ACK (or not). (4) When about to transmit an ACK or DATA packet, record the Tx timestamp before only; don't update the timestamp afterwards. (5) Switch the ordering between recording the serial and recording the timestamp to always set the serial number first. The serial number shouldn't be seen referenced by an ACK packet until we've transmitted the packet bearing it - so in the Rx path, we don't need the timestamp until we've checked the serial number. Fixes: cf1a6474f807 ("rxrpc: Add per-peer RTT tracker") Signed-off-by: David Howells --- net/rxrpc/input.c | 8 ++++++-- net/rxrpc/local_object.c | 9 +++++++++ net/rxrpc/output.c | 31 ++++++++++++++++++------------- 3 files changed, 33 insertions(+), 15 deletions(-) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index ec299c627f77..7f9ed3a60b9a 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -622,13 +622,14 @@ static void rxrpc_input_requested_ack(struct rxrpc_call *call, if (!skb) continue; + sent_at = skb->tstamp; + smp_rmb(); /* Read timestamp before serial. */ sp = rxrpc_skb(skb); if (sp->hdr.serial != orig_serial) continue; - smp_rmb(); - sent_at = skb->tstamp; goto found; } + return; found: @@ -1143,6 +1144,9 @@ void rxrpc_data_ready(struct sock *udp_sk) return; } + if (skb->tstamp == 0) + skb->tstamp = ktime_get_real(); + rxrpc_new_skb(skb, rxrpc_skb_rx_received); _net("recv skb %p", skb); diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index 777c3ed4cfc0..81de7d889ffa 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -173,6 +173,15 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) _debug("setsockopt failed"); goto error; } + + /* We want receive timestamps. */ + opt = 1; + ret = kernel_setsockopt(local->socket, SOL_SOCKET, SO_TIMESTAMPNS, + (char *)&opt, sizeof(opt)); + if (ret < 0) { + _debug("setsockopt failed"); + goto error; + } break; default: diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index ccf5de160444..8a4da3fe96df 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -124,7 +124,6 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping, struct kvec iov[2]; rxrpc_serial_t serial; rxrpc_seq_t hard_ack, top; - ktime_t now; size_t len, n; int ret; u8 reason; @@ -196,9 +195,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping, /* We need to stick a time in before we send the packet in case * the reply gets back before kernel_sendmsg() completes - but * asking UDP to send the packet can take a relatively long - * time, so we update the time after, on the assumption that - * the packet transmission is more likely to happen towards the - * end of the kernel_sendmsg() call. + * time. */ call->ping_time = ktime_get_real(); set_bit(RXRPC_CALL_PINGING, &call->flags); @@ -206,9 +203,6 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping, } ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len); - now = ktime_get_real(); - if (ping) - call->ping_time = now; conn->params.peer->last_tx_at = ktime_get_seconds(); if (ret < 0) trace_rxrpc_tx_fail(call->debug_id, serial, ret, @@ -363,8 +357,14 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb, /* If our RTT cache needs working on, request an ACK. Also request * ACKs if a DATA packet appears to have been lost. + * + * However, we mustn't request an ACK on the last reply packet of a + * service call, lest OpenAFS incorrectly send us an ACK with some + * soft-ACKs in it and then never follow up with a proper hard ACK. */ - if (!(sp->hdr.flags & RXRPC_LAST_PACKET) && + if ((!(sp->hdr.flags & RXRPC_LAST_PACKET) || + rxrpc_to_server(sp) + ) && (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events) || retrans || call->cong_mode == RXRPC_CALL_SLOW_START || @@ -390,6 +390,11 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb, goto send_fragmentable; down_read(&conn->params.local->defrag_sem); + + sp->hdr.serial = serial; + smp_wmb(); /* Set serial before timestamp */ + skb->tstamp = ktime_get_real(); + /* send the packet by UDP * - returns -EMSGSIZE if UDP would have to fragment the packet * to go out of the interface @@ -413,12 +418,8 @@ done: trace_rxrpc_tx_data(call, sp->hdr.seq, serial, whdr.flags, retrans, lost); if (ret >= 0) { - ktime_t now = ktime_get_real(); - skb->tstamp = now; - smp_wmb(); - sp->hdr.serial = serial; if (whdr.flags & RXRPC_REQUEST_ACK) { - call->peer->rtt_last_req = now; + call->peer->rtt_last_req = skb->tstamp; trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_data, serial); if (call->peer->rtt_usage > 1) { unsigned long nowj = jiffies, ack_lost_at; @@ -457,6 +458,10 @@ send_fragmentable: down_write(&conn->params.local->defrag_sem); + sp->hdr.serial = serial; + smp_wmb(); /* Set serial before timestamp */ + skb->tstamp = ktime_get_real(); + switch (conn->params.local->srx.transport.family) { case AF_INET: opt = IP_PMTUDISC_DONT; -- cgit v1.2.3 From ece64fec164f523bfbe874abdef2a0e6ff376251 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 27 Sep 2018 15:13:08 +0100 Subject: rxrpc: Emit BUSY packets when supposed to rather than ABORTs In the input path, a received sk_buff can be marked for rejection by setting RXRPC_SKB_MARK_* in skb->mark and, if needed, some auxiliary data (such as an abort code) in skb->priority. The rejection is handled by queueing the sk_buff up for dealing with in process context. The output code reads the mark and priority and, theoretically, generates an appropriate response packet. However, if RXRPC_SKB_MARK_BUSY is set, this isn't noticed and an ABORT message with a random abort code is generated (since skb->priority wasn't set to anything). Fix this by outputting the appropriate sort of packet. Also, whilst we're at it, most of the marks are no longer used, so remove them and rename the remaining two to something more obvious. Fixes: 248f219cb8bc ("rxrpc: Rewrite the data and ack handling code") Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 13 ++++--------- net/rxrpc/call_accept.c | 6 +++--- net/rxrpc/input.c | 2 +- net/rxrpc/output.c | 23 ++++++++++++++++++----- 4 files changed, 26 insertions(+), 18 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 9fcb3e197b14..e8861cb78070 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -40,17 +40,12 @@ struct rxrpc_crypt { struct rxrpc_connection; /* - * Mark applied to socket buffers. + * Mark applied to socket buffers in skb->mark. skb->priority is used + * to pass supplementary information. */ enum rxrpc_skb_mark { - RXRPC_SKB_MARK_DATA, /* data message */ - RXRPC_SKB_MARK_FINAL_ACK, /* final ACK received message */ - RXRPC_SKB_MARK_BUSY, /* server busy message */ - RXRPC_SKB_MARK_REMOTE_ABORT, /* remote abort message */ - RXRPC_SKB_MARK_LOCAL_ABORT, /* local abort message */ - RXRPC_SKB_MARK_NET_ERROR, /* network error message */ - RXRPC_SKB_MARK_LOCAL_ERROR, /* local error message */ - RXRPC_SKB_MARK_NEW_CALL, /* local error message */ + RXRPC_SKB_MARK_REJECT_BUSY, /* Reject with BUSY */ + RXRPC_SKB_MARK_REJECT_ABORT, /* Reject with ABORT (code in skb->priority) */ }; /* diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 9d1e298b784c..e88f131c1d7f 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -353,7 +353,7 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local, trace_rxrpc_abort(0, "INV", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, RX_INVALID_OPERATION, EOPNOTSUPP); - skb->mark = RXRPC_SKB_MARK_LOCAL_ABORT; + skb->mark = RXRPC_SKB_MARK_REJECT_ABORT; skb->priority = RX_INVALID_OPERATION; _leave(" = NULL [service]"); return NULL; @@ -364,7 +364,7 @@ found_service: rx->sk.sk_state == RXRPC_CLOSE) { trace_rxrpc_abort(0, "CLS", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, RX_INVALID_OPERATION, ESHUTDOWN); - skb->mark = RXRPC_SKB_MARK_LOCAL_ABORT; + skb->mark = RXRPC_SKB_MARK_REJECT_ABORT; skb->priority = RX_INVALID_OPERATION; _leave(" = NULL [close]"); call = NULL; @@ -373,7 +373,7 @@ found_service: call = rxrpc_alloc_incoming_call(rx, local, conn, skb); if (!call) { - skb->mark = RXRPC_SKB_MARK_BUSY; + skb->mark = RXRPC_SKB_MARK_REJECT_BUSY; _leave(" = NULL [busy]"); call = NULL; goto out; diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 7f9ed3a60b9a..b0f12471f5e7 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -1354,7 +1354,7 @@ bad_message: protocol_error: skb->priority = RX_PROTOCOL_ERROR; post_abort: - skb->mark = RXRPC_SKB_MARK_LOCAL_ABORT; + skb->mark = RXRPC_SKB_MARK_REJECT_ABORT; reject_packet: trace_rxrpc_rx_done(skb->mark, skb->priority); rxrpc_reject_packet(local, skb); diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 8a4da3fe96df..e8fb8922bca8 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -524,7 +524,7 @@ void rxrpc_reject_packets(struct rxrpc_local *local) struct kvec iov[2]; size_t size; __be32 code; - int ret; + int ret, ioc; _enter("%d", local->debug_id); @@ -532,7 +532,6 @@ void rxrpc_reject_packets(struct rxrpc_local *local) iov[0].iov_len = sizeof(whdr); iov[1].iov_base = &code; iov[1].iov_len = sizeof(code); - size = sizeof(whdr) + sizeof(code); msg.msg_name = &srx.transport; msg.msg_control = NULL; @@ -540,17 +539,31 @@ void rxrpc_reject_packets(struct rxrpc_local *local) msg.msg_flags = 0; memset(&whdr, 0, sizeof(whdr)); - whdr.type = RXRPC_PACKET_TYPE_ABORT; while ((skb = skb_dequeue(&local->reject_queue))) { rxrpc_see_skb(skb, rxrpc_skb_rx_seen); sp = rxrpc_skb(skb); + switch (skb->mark) { + case RXRPC_SKB_MARK_REJECT_BUSY: + whdr.type = RXRPC_PACKET_TYPE_BUSY; + size = sizeof(whdr); + ioc = 1; + break; + case RXRPC_SKB_MARK_REJECT_ABORT: + whdr.type = RXRPC_PACKET_TYPE_ABORT; + code = htonl(skb->priority); + size = sizeof(whdr) + sizeof(code); + ioc = 2; + break; + default: + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); + continue; + } + if (rxrpc_extract_addr_from_skb(local, &srx, skb) == 0) { msg.msg_namelen = srx.transport_len; - code = htonl(skb->priority); - whdr.epoch = htonl(sp->hdr.epoch); whdr.cid = htonl(sp->hdr.cid); whdr.callNumber = htonl(sp->hdr.callNumber); -- cgit v1.2.3 From 403fc2a138457f1071b186786a7589ef7382c8bc Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 27 Sep 2018 15:13:08 +0100 Subject: rxrpc: Improve up-front incoming packet checking Do more up-front checking on incoming packets to weed out invalid ones and also ones aimed at services that we don't support. Whilst we're at it, replace the clearing of call and skew if we don't find a connection with just initialising the variables to zero at the top of the function. Signed-off-by: David Howells --- net/rxrpc/input.c | 63 +++++++++++++++++++++++++++++++++++++++++----------- net/rxrpc/protocol.h | 15 ------------- 2 files changed, 50 insertions(+), 28 deletions(-) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index b0f12471f5e7..a569e9e010d1 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -1125,12 +1125,13 @@ void rxrpc_data_ready(struct sock *udp_sk) { struct rxrpc_connection *conn; struct rxrpc_channel *chan; - struct rxrpc_call *call; + struct rxrpc_call *call = NULL; struct rxrpc_skb_priv *sp; struct rxrpc_local *local = udp_sk->sk_user_data; + struct rxrpc_sock *rx; struct sk_buff *skb; unsigned int channel; - int ret, skew; + int ret, skew = 0; _enter("%p", udp_sk); @@ -1181,12 +1182,6 @@ void rxrpc_data_ready(struct sock *udp_sk) trace_rxrpc_rx_packet(sp); - if (sp->hdr.type >= RXRPC_N_PACKET_TYPES || - !((RXRPC_SUPPORTED_PACKET_TYPES >> sp->hdr.type) & 1)) { - _proto("Rx Bad Packet Type %u", sp->hdr.type); - goto bad_message; - } - switch (sp->hdr.type) { case RXRPC_PACKET_TYPE_VERSION: if (rxrpc_to_client(sp)) @@ -1198,24 +1193,63 @@ void rxrpc_data_ready(struct sock *udp_sk) if (rxrpc_to_server(sp)) goto discard; /* Fall through */ + case RXRPC_PACKET_TYPE_ACK: + case RXRPC_PACKET_TYPE_ACKALL: + if (sp->hdr.callNumber == 0) + goto bad_message; + /* Fall through */ + case RXRPC_PACKET_TYPE_ABORT: + break; case RXRPC_PACKET_TYPE_DATA: - if (sp->hdr.callNumber == 0) + if (sp->hdr.callNumber == 0 || + sp->hdr.seq == 0) goto bad_message; if (sp->hdr.flags & RXRPC_JUMBO_PACKET && !rxrpc_validate_jumbo(skb)) goto bad_message; break; + case RXRPC_PACKET_TYPE_CHALLENGE: + if (rxrpc_to_server(sp)) + goto discard; + break; + case RXRPC_PACKET_TYPE_RESPONSE: + if (rxrpc_to_client(sp)) + goto discard; + break; + /* Packet types 9-11 should just be ignored. */ case RXRPC_PACKET_TYPE_PARAMS: case RXRPC_PACKET_TYPE_10: case RXRPC_PACKET_TYPE_11: goto discard; + + default: + _proto("Rx Bad Packet Type %u", sp->hdr.type); + goto bad_message; } + if (sp->hdr.serviceId == 0) + goto bad_message; + rcu_read_lock(); + if (rxrpc_to_server(sp)) { + /* Weed out packets to services we're not offering. Packets + * that would begin a call are explicitly rejected and the rest + * are just discarded. + */ + rx = rcu_dereference(local->service); + if (!rx || (sp->hdr.serviceId != rx->srx.srx_service && + sp->hdr.serviceId != rx->second_service)) { + if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA && + sp->hdr.seq == 1) + goto unsupported_service; + goto discard_unlock; + } + } + conn = rxrpc_find_connection_rcu(local, skb); if (conn) { if (sp->hdr.securityIndex != conn->security_ix) @@ -1297,14 +1331,10 @@ void rxrpc_data_ready(struct sock *udp_sk) if (!test_bit(RXRPC_CALL_RX_HEARD, &call->flags)) set_bit(RXRPC_CALL_RX_HEARD, &call->flags); } - } else { - skew = 0; - call = NULL; } if (!call || atomic_read(&call->usage) == 0) { if (rxrpc_to_client(sp) || - sp->hdr.callNumber == 0 || sp->hdr.type != RXRPC_PACKET_TYPE_DATA) goto bad_message_unlock; if (sp->hdr.seq != 1) @@ -1340,6 +1370,13 @@ wrong_security: skb->priority = RXKADINCONSISTENCY; goto post_abort; +unsupported_service: + rcu_read_unlock(); + trace_rxrpc_abort(0, "INV", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, + RX_INVALID_OPERATION, EOPNOTSUPP); + skb->priority = RX_INVALID_OPERATION; + goto post_abort; + reupgrade: rcu_read_unlock(); trace_rxrpc_abort(0, "UPG", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, diff --git a/net/rxrpc/protocol.h b/net/rxrpc/protocol.h index 93da73bf7098..f9cb83c938f3 100644 --- a/net/rxrpc/protocol.h +++ b/net/rxrpc/protocol.h @@ -50,7 +50,6 @@ struct rxrpc_wire_header { #define RXRPC_PACKET_TYPE_10 10 /* Ignored */ #define RXRPC_PACKET_TYPE_11 11 /* Ignored */ #define RXRPC_PACKET_TYPE_VERSION 13 /* version string request */ -#define RXRPC_N_PACKET_TYPES 14 /* number of packet types (incl type 0) */ uint8_t flags; /* packet flags */ #define RXRPC_CLIENT_INITIATED 0x01 /* signifies a packet generated by a client */ @@ -72,20 +71,6 @@ struct rxrpc_wire_header { } __packed; -#define RXRPC_SUPPORTED_PACKET_TYPES ( \ - (1 << RXRPC_PACKET_TYPE_DATA) | \ - (1 << RXRPC_PACKET_TYPE_ACK) | \ - (1 << RXRPC_PACKET_TYPE_BUSY) | \ - (1 << RXRPC_PACKET_TYPE_ABORT) | \ - (1 << RXRPC_PACKET_TYPE_ACKALL) | \ - (1 << RXRPC_PACKET_TYPE_CHALLENGE) | \ - (1 << RXRPC_PACKET_TYPE_RESPONSE) | \ - /*(1 << RXRPC_PACKET_TYPE_DEBUG) | */ \ - (1 << RXRPC_PACKET_TYPE_PARAMS) | \ - (1 << RXRPC_PACKET_TYPE_10) | \ - (1 << RXRPC_PACKET_TYPE_11) | \ - (1 << RXRPC_PACKET_TYPE_VERSION)) - /*****************************************************************************/ /* * jumbo packet secondary header -- cgit v1.2.3 From 0099dc589bfa7caf6f2608c4cbc1181cfee22b0c Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 27 Sep 2018 15:13:09 +0100 Subject: rxrpc: Make service call handling more robust Make the following changes to improve the robustness of the code that sets up a new service call: (1) Cache the rxrpc_sock struct obtained in rxrpc_data_ready() to do a service ID check and pass that along to rxrpc_new_incoming_call(). This means that I can remove the check from rxrpc_new_incoming_call() without the need to worry about the socket attached to the local endpoint getting replaced - which would invalidate the check. (2) Cache the rxrpc_peer struct, thereby allowing the peer search to be done once. The peer is passed to rxrpc_new_incoming_call(), thereby saving the need to repeat the search. This also reduces the possibility of rxrpc_publish_service_conn() BUG()'ing due to the detection of a duplicate connection, despite the initial search done by rxrpc_find_connection_rcu() having turned up nothing. This BUG() shouldn't ever get hit since rxrpc_data_ready() *should* be non-reentrant and the result of the initial search should still hold true, but it has proven possible to hit. I *think* this may be due to __rxrpc_lookup_peer_rcu() cutting short the iteration over the hash table if it finds a matching peer with a zero usage count, but I don't know for sure since it's only ever been hit once that I know of. Another possibility is that a bug in rxrpc_data_ready() that checked the wrong byte in the header for the RXRPC_CLIENT_INITIATED flag might've let through a packet that caused a spurious and invalid call to be set up. That is addressed in another patch. (3) Fix __rxrpc_lookup_peer_rcu() to skip peer records that have a zero usage count rather than stopping and returning not found, just in case there's another peer record behind it in the bucket. (4) Don't search the peer records in rxrpc_alloc_incoming_call(), but rather either use the peer cached in (2) or, if one wasn't found, preemptively install a new one. Fixes: 8496af50eb38 ("rxrpc: Use RCU to access a peer's service connection tree") Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 8 +++++--- net/rxrpc/call_accept.c | 41 ++++++++++++----------------------------- net/rxrpc/conn_object.c | 7 ++++++- net/rxrpc/input.c | 7 ++++--- net/rxrpc/peer_object.c | 35 +++++++++++------------------------ 5 files changed, 38 insertions(+), 60 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index e8861cb78070..c72686193d83 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -722,6 +722,8 @@ extern struct workqueue_struct *rxrpc_workqueue; int rxrpc_service_prealloc(struct rxrpc_sock *, gfp_t); void rxrpc_discard_prealloc(struct rxrpc_sock *); struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *, + struct rxrpc_sock *, + struct rxrpc_peer *, struct rxrpc_connection *, struct sk_buff *); void rxrpc_accept_incoming_calls(struct rxrpc_local *); @@ -913,7 +915,8 @@ extern unsigned int rxrpc_closed_conn_expiry; struct rxrpc_connection *rxrpc_alloc_connection(gfp_t); struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *, - struct sk_buff *); + struct sk_buff *, + struct rxrpc_peer **); void __rxrpc_disconnect_call(struct rxrpc_connection *, struct rxrpc_call *); void rxrpc_disconnect_call(struct rxrpc_call *); void rxrpc_kill_connection(struct rxrpc_connection *); @@ -1049,8 +1052,7 @@ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *, struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *, struct sockaddr_rxrpc *, gfp_t); struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *, gfp_t); -struct rxrpc_peer *rxrpc_lookup_incoming_peer(struct rxrpc_local *, - struct rxrpc_peer *); +void rxrpc_new_incoming_peer(struct rxrpc_local *, struct rxrpc_peer *); void rxrpc_destroy_all_peers(struct rxrpc_net *); struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *); struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *); diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index e88f131c1d7f..9c7f26d06a52 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -249,11 +249,11 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx) */ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, struct rxrpc_local *local, + struct rxrpc_peer *peer, struct rxrpc_connection *conn, struct sk_buff *skb) { struct rxrpc_backlog *b = rx->backlog; - struct rxrpc_peer *peer, *xpeer; struct rxrpc_call *call; unsigned short call_head, conn_head, peer_head; unsigned short call_tail, conn_tail, peer_tail; @@ -276,21 +276,18 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, return NULL; if (!conn) { - /* No connection. We're going to need a peer to start off - * with. If one doesn't yet exist, use a spare from the - * preallocation set. We dump the address into the spare in - * anticipation - and to save on stack space. - */ - xpeer = b->peer_backlog[peer_tail]; - if (rxrpc_extract_addr_from_skb(local, &xpeer->srx, skb) < 0) - return NULL; - - peer = rxrpc_lookup_incoming_peer(local, xpeer); - if (peer == xpeer) { + if (peer && !rxrpc_get_peer_maybe(peer)) + peer = NULL; + if (!peer) { + peer = b->peer_backlog[peer_tail]; + if (rxrpc_extract_addr_from_skb(local, &peer->srx, skb) < 0) + return NULL; b->peer_backlog[peer_tail] = NULL; smp_store_release(&b->peer_backlog_tail, (peer_tail + 1) & (RXRPC_BACKLOG_MAX - 1)); + + rxrpc_new_incoming_peer(local, peer); } /* Now allocate and set up the connection */ @@ -335,30 +332,16 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, * The call is returned with the user access mutex held. */ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local, + struct rxrpc_sock *rx, + struct rxrpc_peer *peer, struct rxrpc_connection *conn, struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - struct rxrpc_sock *rx; struct rxrpc_call *call; - u16 service_id = sp->hdr.serviceId; _enter(""); - /* Get the socket providing the service */ - rx = rcu_dereference(local->service); - if (rx && (service_id == rx->srx.srx_service || - service_id == rx->second_service)) - goto found_service; - - trace_rxrpc_abort(0, "INV", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, - RX_INVALID_OPERATION, EOPNOTSUPP); - skb->mark = RXRPC_SKB_MARK_REJECT_ABORT; - skb->priority = RX_INVALID_OPERATION; - _leave(" = NULL [service]"); - return NULL; - -found_service: spin_lock(&rx->incoming_lock); if (rx->sk.sk_state == RXRPC_SERVER_LISTEN_DISABLED || rx->sk.sk_state == RXRPC_CLOSE) { @@ -371,7 +354,7 @@ found_service: goto out; } - call = rxrpc_alloc_incoming_call(rx, local, conn, skb); + call = rxrpc_alloc_incoming_call(rx, local, peer, conn, skb); if (!call) { skb->mark = RXRPC_SKB_MARK_REJECT_BUSY; _leave(" = NULL [busy]"); diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 390ba50cfab4..b4438f98dc5c 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -69,10 +69,14 @@ struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp) * If successful, a pointer to the connection is returned, but no ref is taken. * NULL is returned if there is no match. * + * When searching for a service call, if we find a peer but no connection, we + * return that through *_peer in case we need to create a new service call. + * * The caller must be holding the RCU read lock. */ struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *local, - struct sk_buff *skb) + struct sk_buff *skb, + struct rxrpc_peer **_peer) { struct rxrpc_connection *conn; struct rxrpc_conn_proto k; @@ -104,6 +108,7 @@ struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *local, peer = rxrpc_lookup_peer_rcu(local, &srx); if (!peer) goto not_found; + *_peer = peer; conn = rxrpc_find_service_conn_rcu(peer, skb); if (!conn || atomic_read(&conn->usage) == 0) goto not_found; diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index a569e9e010d1..800f5b8a1baa 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -1128,7 +1128,8 @@ void rxrpc_data_ready(struct sock *udp_sk) struct rxrpc_call *call = NULL; struct rxrpc_skb_priv *sp; struct rxrpc_local *local = udp_sk->sk_user_data; - struct rxrpc_sock *rx; + struct rxrpc_peer *peer = NULL; + struct rxrpc_sock *rx = NULL; struct sk_buff *skb; unsigned int channel; int ret, skew = 0; @@ -1250,7 +1251,7 @@ void rxrpc_data_ready(struct sock *udp_sk) } } - conn = rxrpc_find_connection_rcu(local, skb); + conn = rxrpc_find_connection_rcu(local, skb, &peer); if (conn) { if (sp->hdr.securityIndex != conn->security_ix) goto wrong_security; @@ -1339,7 +1340,7 @@ void rxrpc_data_ready(struct sock *udp_sk) goto bad_message_unlock; if (sp->hdr.seq != 1) goto discard_unlock; - call = rxrpc_new_incoming_call(local, conn, skb); + call = rxrpc_new_incoming_call(local, rx, peer, conn, skb); if (!call) { rcu_read_unlock(); goto reject_packet; diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index 1dc7648e3eff..70083e8fb6e5 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -124,11 +124,9 @@ static struct rxrpc_peer *__rxrpc_lookup_peer_rcu( struct rxrpc_net *rxnet = local->rxnet; hash_for_each_possible_rcu(rxnet->peer_hash, peer, hash_link, hash_key) { - if (rxrpc_peer_cmp_key(peer, local, srx, hash_key) == 0) { - if (atomic_read(&peer->usage) == 0) - return NULL; + if (rxrpc_peer_cmp_key(peer, local, srx, hash_key) == 0 && + atomic_read(&peer->usage) > 0) return peer; - } } return NULL; @@ -299,34 +297,23 @@ static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_local *local, } /* - * Set up a new incoming peer. The address is prestored in the preallocated - * peer. + * Set up a new incoming peer. There shouldn't be any other matching peers + * since we've already done a search in the list from the non-reentrant context + * (the data_ready handler) that is the only place we can add new peers. */ -struct rxrpc_peer *rxrpc_lookup_incoming_peer(struct rxrpc_local *local, - struct rxrpc_peer *prealloc) +void rxrpc_new_incoming_peer(struct rxrpc_local *local, struct rxrpc_peer *peer) { - struct rxrpc_peer *peer; struct rxrpc_net *rxnet = local->rxnet; unsigned long hash_key; - hash_key = rxrpc_peer_hash_key(local, &prealloc->srx); - prealloc->local = local; - rxrpc_init_peer(prealloc, hash_key); + hash_key = rxrpc_peer_hash_key(local, &peer->srx); + peer->local = local; + rxrpc_init_peer(peer, hash_key); spin_lock(&rxnet->peer_hash_lock); - - /* Need to check that we aren't racing with someone else */ - peer = __rxrpc_lookup_peer_rcu(local, &prealloc->srx, hash_key); - if (peer && !rxrpc_get_peer_maybe(peer)) - peer = NULL; - if (!peer) { - peer = prealloc; - hash_add_rcu(rxnet->peer_hash, &peer->hash_link, hash_key); - list_add_tail(&peer->keepalive_link, &rxnet->peer_keepalive_new); - } - + hash_add_rcu(rxnet->peer_hash, &peer->hash_link, hash_key); + list_add_tail(&peer->keepalive_link, &rxnet->peer_keepalive_new); spin_unlock(&rxnet->peer_hash_lock); - return peer; } /* -- cgit v1.2.3 From 37a675e768d7606fe8a53e0c459c9b53e121ac20 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 27 Sep 2018 15:13:09 +0100 Subject: rxrpc: Fix transport sockopts to get IPv4 errors on an IPv6 socket It seems that enabling IPV6_RECVERR on an IPv6 socket doesn't also turn on IP_RECVERR, so neither local errors nor ICMP-transported remote errors from IPv4 peer addresses are returned to the AF_RXRPC protocol. Make the sockopt setting code in rxrpc_open_socket() fall through from the AF_INET6 case to the AF_INET case to turn on all the AF_INET options too in the AF_INET6 case. Fixes: f2aeed3a591f ("rxrpc: Fix error reception on AF_INET6 sockets") Signed-off-by: David Howells --- net/rxrpc/local_object.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index 81de7d889ffa..94d234e9c685 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -135,10 +135,10 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) } switch (local->srx.transport.family) { - case AF_INET: - /* we want to receive ICMP errors */ + case AF_INET6: + /* we want to receive ICMPv6 errors */ opt = 1; - ret = kernel_setsockopt(local->socket, SOL_IP, IP_RECVERR, + ret = kernel_setsockopt(local->socket, SOL_IPV6, IPV6_RECVERR, (char *) &opt, sizeof(opt)); if (ret < 0) { _debug("setsockopt failed"); @@ -146,19 +146,22 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) } /* we want to set the don't fragment bit */ - opt = IP_PMTUDISC_DO; - ret = kernel_setsockopt(local->socket, SOL_IP, IP_MTU_DISCOVER, + opt = IPV6_PMTUDISC_DO; + ret = kernel_setsockopt(local->socket, SOL_IPV6, IPV6_MTU_DISCOVER, (char *) &opt, sizeof(opt)); if (ret < 0) { _debug("setsockopt failed"); goto error; } - break; - case AF_INET6: + /* Fall through and set IPv4 options too otherwise we don't get + * errors from IPv4 packets sent through the IPv6 socket. + */ + + case AF_INET: /* we want to receive ICMP errors */ opt = 1; - ret = kernel_setsockopt(local->socket, SOL_IPV6, IPV6_RECVERR, + ret = kernel_setsockopt(local->socket, SOL_IP, IP_RECVERR, (char *) &opt, sizeof(opt)); if (ret < 0) { _debug("setsockopt failed"); @@ -166,8 +169,8 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) } /* we want to set the don't fragment bit */ - opt = IPV6_PMTUDISC_DO; - ret = kernel_setsockopt(local->socket, SOL_IPV6, IPV6_MTU_DISCOVER, + opt = IP_PMTUDISC_DO; + ret = kernel_setsockopt(local->socket, SOL_IP, IP_MTU_DISCOVER, (char *) &opt, sizeof(opt)); if (ret < 0) { _debug("setsockopt failed"); -- cgit v1.2.3 From f334430316e7fd37c4821ebec627e27714bb5d76 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 27 Sep 2018 15:13:09 +0100 Subject: rxrpc: Fix error distribution Fix error distribution by immediately delivering the errors to all the affected calls rather than deferring them to a worker thread. The problem with the latter is that retries and things can happen in the meantime when we want to stop that sooner. To this end: (1) Stop the error distributor from removing calls from the error_targets list so that peer->lock isn't needed to synchronise against other adds and removals. (2) Require the peer's error_targets list to be accessed with RCU, thereby avoiding the need to take peer->lock over distribution. (3) Don't attempt to affect a call's state if it is already marked complete. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 4 +--- net/rxrpc/ar-internal.h | 5 ----- net/rxrpc/call_object.c | 2 +- net/rxrpc/conn_client.c | 4 ++-- net/rxrpc/conn_object.c | 2 +- net/rxrpc/peer_event.c | 46 +++++++++++--------------------------------- net/rxrpc/peer_object.c | 17 ---------------- 7 files changed, 16 insertions(+), 64 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 196587b8f204..837393fa897b 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -56,7 +56,6 @@ enum rxrpc_peer_trace { rxrpc_peer_new, rxrpc_peer_processing, rxrpc_peer_put, - rxrpc_peer_queued_error, }; enum rxrpc_conn_trace { @@ -257,8 +256,7 @@ enum rxrpc_tx_point { EM(rxrpc_peer_got, "GOT") \ EM(rxrpc_peer_new, "NEW") \ EM(rxrpc_peer_processing, "PRO") \ - EM(rxrpc_peer_put, "PUT") \ - E_(rxrpc_peer_queued_error, "QER") + E_(rxrpc_peer_put, "PUT") #define rxrpc_conn_traces \ EM(rxrpc_conn_got, "GOT") \ diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index c72686193d83..ef9554131434 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -288,7 +288,6 @@ struct rxrpc_peer { struct hlist_node hash_link; struct rxrpc_local *local; struct hlist_head error_targets; /* targets for net error distribution */ - struct work_struct error_distributor; struct rb_root service_conns; /* Service connections */ struct list_head keepalive_link; /* Link in net->peer_keepalive[] */ time64_t last_tx_at; /* Last time packet sent here */ @@ -299,8 +298,6 @@ struct rxrpc_peer { unsigned int maxdata; /* data size (MTU - hdrsize) */ unsigned short hdrsize; /* header size (IP + UDP + RxRPC) */ int debug_id; /* debug ID for printks */ - int error_report; /* Net (+0) or local (+1000000) to distribute */ -#define RXRPC_LOCAL_ERROR_OFFSET 1000000 struct sockaddr_rxrpc srx; /* remote address */ /* calculated RTT cache */ @@ -1039,7 +1036,6 @@ void rxrpc_send_keepalive(struct rxrpc_peer *); * peer_event.c */ void rxrpc_error_report(struct sock *); -void rxrpc_peer_error_distributor(struct work_struct *); void rxrpc_peer_add_rtt(struct rxrpc_call *, enum rxrpc_rtt_rx_trace, rxrpc_serial_t, rxrpc_serial_t, ktime_t, ktime_t); void rxrpc_peer_keepalive_worker(struct work_struct *); @@ -1057,7 +1053,6 @@ void rxrpc_destroy_all_peers(struct rxrpc_net *); struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *); struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *); void rxrpc_put_peer(struct rxrpc_peer *); -void __rxrpc_queue_peer_error(struct rxrpc_peer *); /* * proc.c diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 9486293fef5c..799f75b6900d 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -400,7 +400,7 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx, rcu_assign_pointer(conn->channels[chan].call, call); spin_lock(&conn->params.peer->lock); - hlist_add_head(&call->error_link, &conn->params.peer->error_targets); + hlist_add_head_rcu(&call->error_link, &conn->params.peer->error_targets); spin_unlock(&conn->params.peer->lock); _net("CALL incoming %d on CONN %d", call->debug_id, call->conn->debug_id); diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index f8f37188a932..8acf74fe24c0 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -710,8 +710,8 @@ int rxrpc_connect_call(struct rxrpc_call *call, } spin_lock_bh(&call->conn->params.peer->lock); - hlist_add_head(&call->error_link, - &call->conn->params.peer->error_targets); + hlist_add_head_rcu(&call->error_link, + &call->conn->params.peer->error_targets); spin_unlock_bh(&call->conn->params.peer->lock); out: diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index b4438f98dc5c..885dae829f4a 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -216,7 +216,7 @@ void rxrpc_disconnect_call(struct rxrpc_call *call) call->peer->cong_cwnd = call->cong_cwnd; spin_lock_bh(&conn->params.peer->lock); - hlist_del_init(&call->error_link); + hlist_del_rcu(&call->error_link); spin_unlock_bh(&conn->params.peer->lock); if (rxrpc_is_client_call(call)) diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index 4f9da2f51c69..f3e6fc670da2 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -23,6 +23,8 @@ #include "ar-internal.h" static void rxrpc_store_error(struct rxrpc_peer *, struct sock_exterr_skb *); +static void rxrpc_distribute_error(struct rxrpc_peer *, int, + enum rxrpc_call_completion); /* * Find the peer associated with an ICMP packet. @@ -194,8 +196,6 @@ void rxrpc_error_report(struct sock *sk) rcu_read_unlock(); rxrpc_free_skb(skb, rxrpc_skb_rx_freed); - /* The ref we obtained is passed off to the work item */ - __rxrpc_queue_peer_error(peer); _leave(""); } @@ -205,6 +205,7 @@ void rxrpc_error_report(struct sock *sk) static void rxrpc_store_error(struct rxrpc_peer *peer, struct sock_exterr_skb *serr) { + enum rxrpc_call_completion compl = RXRPC_CALL_NETWORK_ERROR; struct sock_extended_err *ee; int err; @@ -255,7 +256,7 @@ static void rxrpc_store_error(struct rxrpc_peer *peer, case SO_EE_ORIGIN_NONE: case SO_EE_ORIGIN_LOCAL: _proto("Rx Received local error { error=%d }", err); - err += RXRPC_LOCAL_ERROR_OFFSET; + compl = RXRPC_CALL_LOCAL_ERROR; break; case SO_EE_ORIGIN_ICMP6: @@ -264,48 +265,23 @@ static void rxrpc_store_error(struct rxrpc_peer *peer, break; } - peer->error_report = err; + rxrpc_distribute_error(peer, err, compl); } /* - * Distribute an error that occurred on a peer + * Distribute an error that occurred on a peer. */ -void rxrpc_peer_error_distributor(struct work_struct *work) +static void rxrpc_distribute_error(struct rxrpc_peer *peer, int error, + enum rxrpc_call_completion compl) { - struct rxrpc_peer *peer = - container_of(work, struct rxrpc_peer, error_distributor); struct rxrpc_call *call; - enum rxrpc_call_completion compl; - int error; - - _enter(""); - - error = READ_ONCE(peer->error_report); - if (error < RXRPC_LOCAL_ERROR_OFFSET) { - compl = RXRPC_CALL_NETWORK_ERROR; - } else { - compl = RXRPC_CALL_LOCAL_ERROR; - error -= RXRPC_LOCAL_ERROR_OFFSET; - } - _debug("ISSUE ERROR %s %d", rxrpc_call_completions[compl], error); - - spin_lock_bh(&peer->lock); - - while (!hlist_empty(&peer->error_targets)) { - call = hlist_entry(peer->error_targets.first, - struct rxrpc_call, error_link); - hlist_del_init(&call->error_link); + hlist_for_each_entry_rcu(call, &peer->error_targets, error_link) { rxrpc_see_call(call); - - if (rxrpc_set_call_completion(call, compl, 0, -error)) + if (call->state < RXRPC_CALL_COMPLETE && + rxrpc_set_call_completion(call, compl, 0, -error)) rxrpc_notify_socket(call); } - - spin_unlock_bh(&peer->lock); - - rxrpc_put_peer(peer); - _leave(""); } /* diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index 70083e8fb6e5..01a9febfa367 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -220,8 +220,6 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp) atomic_set(&peer->usage, 1); peer->local = local; INIT_HLIST_HEAD(&peer->error_targets); - INIT_WORK(&peer->error_distributor, - &rxrpc_peer_error_distributor); peer->service_conns = RB_ROOT; seqlock_init(&peer->service_conn_lock); spin_lock_init(&peer->lock); @@ -402,21 +400,6 @@ struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *peer) return peer; } -/* - * Queue a peer record. This passes the caller's ref to the workqueue. - */ -void __rxrpc_queue_peer_error(struct rxrpc_peer *peer) -{ - const void *here = __builtin_return_address(0); - int n; - - n = atomic_read(&peer->usage); - if (rxrpc_queue_work(&peer->error_distributor)) - trace_rxrpc_peer(peer, rxrpc_peer_queued_error, n, here); - else - rxrpc_put_peer(peer); -} - /* * Discard a peer record. */ -- cgit v1.2.3 From a13f814a67b12a2f29d1decf4b4f4e700658a517 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Thu, 30 Aug 2018 17:56:52 +0900 Subject: netfilter: nft_set_rbtree: add missing rb_erase() in GC routine The nft_set_gc_batch_check() checks whether gc buffer is full. If gc buffer is full, gc buffer is released by the nft_set_gc_batch_complete() internally. In case of rbtree, the rb_erase() should be called before calling the nft_set_gc_batch_complete(). therefore the rb_erase() should be called before calling the nft_set_gc_batch_check() too. test commands: table ip filter { set set1 { type ipv4_addr; flags interval, timeout; gc-interval 10s; timeout 1s; elements = { 1-2, 3-4, 5-6, ... 10000-10001, } } } %nft -f test.nft splat looks like: [ 430.273885] kasan: GPF could be caused by NULL-ptr deref or user memory access [ 430.282158] general protection fault: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI [ 430.283116] CPU: 1 PID: 190 Comm: kworker/1:2 Tainted: G B 4.18.0+ #7 [ 430.283116] Workqueue: events_power_efficient nft_rbtree_gc [nf_tables_set] [ 430.313559] RIP: 0010:rb_next+0x81/0x130 [ 430.313559] Code: 08 49 bd 00 00 00 00 00 fc ff df 48 bb 00 00 00 00 00 fc ff df 48 85 c0 75 05 eb 58 48 89 d4 [ 430.313559] RSP: 0018:ffff88010cdb7680 EFLAGS: 00010207 [ 430.313559] RAX: 0000000000b84854 RBX: dffffc0000000000 RCX: ffffffff83f01973 [ 430.313559] RDX: 000000000017090c RSI: 0000000000000008 RDI: 0000000000b84864 [ 430.313559] RBP: ffff8801060d4588 R08: fffffbfff09bc349 R09: fffffbfff09bc349 [ 430.313559] R10: 0000000000000001 R11: fffffbfff09bc348 R12: ffff880100f081a8 [ 430.313559] R13: dffffc0000000000 R14: ffff880100ff8688 R15: dffffc0000000000 [ 430.313559] FS: 0000000000000000(0000) GS:ffff88011b400000(0000) knlGS:0000000000000000 [ 430.313559] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 430.313559] CR2: 0000000001551008 CR3: 000000005dc16000 CR4: 00000000001006e0 [ 430.313559] Call Trace: [ 430.313559] nft_rbtree_gc+0x112/0x5c0 [nf_tables_set] [ 430.313559] process_one_work+0xc13/0x1ec0 [ 430.313559] ? _raw_spin_unlock_irq+0x29/0x40 [ 430.313559] ? pwq_dec_nr_in_flight+0x3c0/0x3c0 [ 430.313559] ? set_load_weight+0x270/0x270 [ 430.313559] ? __switch_to_asm+0x34/0x70 [ 430.313559] ? __switch_to_asm+0x40/0x70 [ 430.313559] ? __switch_to_asm+0x34/0x70 [ 430.313559] ? __switch_to_asm+0x34/0x70 [ 430.313559] ? __switch_to_asm+0x40/0x70 [ 430.313559] ? __switch_to_asm+0x34/0x70 [ 430.313559] ? __switch_to_asm+0x40/0x70 [ 430.313559] ? __switch_to_asm+0x34/0x70 [ 430.313559] ? __switch_to_asm+0x34/0x70 [ 430.313559] ? __switch_to_asm+0x40/0x70 [ 430.313559] ? __switch_to_asm+0x34/0x70 [ 430.313559] ? __schedule+0x6d3/0x1f50 [ 430.313559] ? find_held_lock+0x39/0x1c0 [ 430.313559] ? __sched_text_start+0x8/0x8 [ 430.313559] ? cyc2ns_read_end+0x10/0x10 [ 430.313559] ? save_trace+0x300/0x300 [ 430.313559] ? sched_clock_local+0xd4/0x140 [ 430.313559] ? find_held_lock+0x39/0x1c0 [ 430.313559] ? worker_thread+0x353/0x1120 [ 430.313559] ? worker_thread+0x353/0x1120 [ 430.313559] ? lock_contended+0xe70/0xe70 [ 430.313559] ? __lock_acquire+0x4500/0x4500 [ 430.535635] ? do_raw_spin_unlock+0xa5/0x330 [ 430.535635] ? do_raw_spin_trylock+0x101/0x1a0 [ 430.535635] ? do_raw_spin_lock+0x1f0/0x1f0 [ 430.535635] ? _raw_spin_lock_irq+0x10/0x70 [ 430.535635] worker_thread+0x15d/0x1120 [ ... ] Fixes: 8d8540c4f5e0 ("netfilter: nft_set_rbtree: add timeout support") Signed-off-by: Taehee Yoo Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_set_rbtree.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 55e2d9215c0d..0e5ec126f6ad 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -355,12 +355,11 @@ cont: static void nft_rbtree_gc(struct work_struct *work) { + struct nft_rbtree_elem *rbe, *rbe_end = NULL, *rbe_prev = NULL; struct nft_set_gc_batch *gcb = NULL; - struct rb_node *node, *prev = NULL; - struct nft_rbtree_elem *rbe; struct nft_rbtree *priv; + struct rb_node *node; struct nft_set *set; - int i; priv = container_of(work, struct nft_rbtree, gc_work.work); set = nft_set_container_of(priv); @@ -371,7 +370,7 @@ static void nft_rbtree_gc(struct work_struct *work) rbe = rb_entry(node, struct nft_rbtree_elem, node); if (nft_rbtree_interval_end(rbe)) { - prev = node; + rbe_end = rbe; continue; } if (!nft_set_elem_expired(&rbe->ext)) @@ -379,29 +378,30 @@ static void nft_rbtree_gc(struct work_struct *work) if (nft_set_elem_mark_busy(&rbe->ext)) continue; + if (rbe_prev) { + rb_erase(&rbe_prev->node, &priv->root); + rbe_prev = NULL; + } gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC); if (!gcb) break; atomic_dec(&set->nelems); nft_set_gc_batch_add(gcb, rbe); + rbe_prev = rbe; - if (prev) { - rbe = rb_entry(prev, struct nft_rbtree_elem, node); + if (rbe_end) { atomic_dec(&set->nelems); - nft_set_gc_batch_add(gcb, rbe); - prev = NULL; + nft_set_gc_batch_add(gcb, rbe_end); + rb_erase(&rbe_end->node, &priv->root); + rbe_end = NULL; } node = rb_next(node); if (!node) break; } - if (gcb) { - for (i = 0; i < gcb->head.cnt; i++) { - rbe = gcb->elems[i]; - rb_erase(&rbe->node, &priv->root); - } - } + if (rbe_prev) + rb_erase(&rbe_prev->node, &priv->root); write_seqcount_end(&priv->count); write_unlock_bh(&priv->lock); -- cgit v1.2.3 From 421c119f558761556afca6a62ad183bc2d8659e0 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 24 Sep 2018 14:10:04 +0200 Subject: netfilter: avoid erronous array bounds warning Unfortunately some versions of gcc emit following warning: $ make net/xfrm/xfrm_output.o linux/compiler.h:252:20: warning: array subscript is above array bounds [-Warray-bounds] hook_head = rcu_dereference(net->nf.hooks_arp[hook]); ^~~~~~~~~~~~~~~~~~~~~ xfrm_output_resume passes skb_dst(skb)->ops->family as its 'pf' arg so compiler can't know that we'll never access hooks_arp[]. (NFPROTO_IPV4 or NFPROTO_IPV6 are only possible cases). Avoid this by adding an explicit WARN_ON_ONCE() check. This patch has no effect if the family is a compile-time constant as gcc will remove the switch() construct entirely. Reported-by: David Ahern Signed-off-by: Florian Westphal Reviewed-by: David Ahern Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 07efffd0c759..bbe99d2b28b4 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -215,6 +215,8 @@ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net, break; case NFPROTO_ARP: #ifdef CONFIG_NETFILTER_FAMILY_ARP + if (WARN_ON_ONCE(hook >= ARRAY_SIZE(net->nf.hooks_arp))) + break; hook_head = rcu_dereference(net->nf.hooks_arp[hook]); #endif break; -- cgit v1.2.3 From 40e4f26e6a14fc1496eabb8b0004a547303114e6 Mon Sep 17 00:00:00 2001 From: Flavio Leitner Date: Thu, 27 Sep 2018 19:36:28 -0300 Subject: netfilter: xt_socket: check sk before checking for netns. Only check for the network namespace if the socket is available. Fixes: f564650106a6 ("netfilter: check if the socket netns is correct.") Reported-by: Guenter Roeck Tested-by: Guenter Roeck Signed-off-by: Flavio Leitner Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_socket.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 0472f3472842..ada144e5645b 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -56,7 +56,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, struct sk_buff *pskb = (struct sk_buff *)skb; struct sock *sk = skb->sk; - if (!net_eq(xt_net(par), sock_net(sk))) + if (sk && !net_eq(xt_net(par), sock_net(sk))) sk = NULL; if (!sk) @@ -117,7 +117,7 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par) struct sk_buff *pskb = (struct sk_buff *)skb; struct sock *sk = skb->sk; - if (!net_eq(xt_net(par), sock_net(sk))) + if (sk && !net_eq(xt_net(par), sock_net(sk))) sk = NULL; if (!sk) -- cgit v1.2.3 From 065a2cdcbdf8eb9aefb66e1a24b2d684b8b8852b Mon Sep 17 00:00:00 2001 From: zhong jiang Date: Wed, 26 Sep 2018 18:07:09 +0200 Subject: s390: qeth_core_mpc: Use ARRAY_SIZE instead of reimplementing its function Use the common code ARRAY_SIZE macro instead of a private implementation. Reviewed-by: Jean Delvare Signed-off-by: zhong jiang Signed-off-by: Martin Schwidefsky Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core_mpc.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/s390/net/qeth_core_mpc.c b/drivers/s390/net/qeth_core_mpc.c index 5bcb8dafc3ee..e8263ded0af0 100644 --- a/drivers/s390/net/qeth_core_mpc.c +++ b/drivers/s390/net/qeth_core_mpc.c @@ -222,8 +222,7 @@ static struct ipa_rc_msg qeth_ipa_rc_msg[] = { char *qeth_get_ipa_msg(enum qeth_ipa_return_codes rc) { int x = 0; - qeth_ipa_rc_msg[sizeof(qeth_ipa_rc_msg) / - sizeof(struct ipa_rc_msg) - 1].rc = rc; + qeth_ipa_rc_msg[ARRAY_SIZE(qeth_ipa_rc_msg) - 1].rc = rc; while (qeth_ipa_rc_msg[x].rc != rc) x++; return qeth_ipa_rc_msg[x].msg; @@ -270,9 +269,7 @@ static struct ipa_cmd_names qeth_ipa_cmd_names[] = { char *qeth_get_ipa_cmd_name(enum qeth_ipa_cmds cmd) { int x = 0; - qeth_ipa_cmd_names[ - sizeof(qeth_ipa_cmd_names) / - sizeof(struct ipa_cmd_names)-1].cmd = cmd; + qeth_ipa_cmd_names[ARRAY_SIZE(qeth_ipa_cmd_names) - 1].cmd = cmd; while (qeth_ipa_cmd_names[x].cmd != cmd) x++; return qeth_ipa_cmd_names[x].name; -- cgit v1.2.3 From 048a7f8b4ec085d5c56ad4a3bf450389a4aed5f9 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 26 Sep 2018 18:07:10 +0200 Subject: s390: qeth: Fix potential array overrun in cmd/rc lookup Functions qeth_get_ipa_msg and qeth_get_ipa_cmd_name are modifying the last member of global arrays without any locking that I can see. If two instances of either function are running at the same time, it could cause a race ultimately leading to an array overrun (the contents of the last entry of the array is the only guarantee that the loop will ever stop). Performing the lookups without modifying the arrays is admittedly slower (two comparisons per iteration instead of one) but these are operations which are rare (should only be needed in error cases or when debugging, not during successful operation) and it seems still less costly than introducing a mutex to protect the arrays in question. As a side bonus, it allows us to declare both arrays as const data. Signed-off-by: Jean Delvare Cc: Julian Wiedmann Cc: Ursula Braun Cc: Martin Schwidefsky Cc: Heiko Carstens Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core_main.c | 2 +- drivers/s390/net/qeth_core_mpc.c | 30 ++++++++++++++++-------------- drivers/s390/net/qeth_core_mpc.h | 4 ++-- 3 files changed, 19 insertions(+), 17 deletions(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index de8282420f96..ffce6f39828a 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -610,7 +610,7 @@ static void qeth_put_reply(struct qeth_reply *reply) static void qeth_issue_ipa_msg(struct qeth_ipa_cmd *cmd, int rc, struct qeth_card *card) { - char *ipa_name; + const char *ipa_name; int com = cmd->hdr.command; ipa_name = qeth_get_ipa_cmd_name(com); if (rc) diff --git a/drivers/s390/net/qeth_core_mpc.c b/drivers/s390/net/qeth_core_mpc.c index e8263ded0af0..e891c0b52f4c 100644 --- a/drivers/s390/net/qeth_core_mpc.c +++ b/drivers/s390/net/qeth_core_mpc.c @@ -148,10 +148,10 @@ EXPORT_SYMBOL_GPL(IPA_PDU_HEADER); struct ipa_rc_msg { enum qeth_ipa_return_codes rc; - char *msg; + const char *msg; }; -static struct ipa_rc_msg qeth_ipa_rc_msg[] = { +static const struct ipa_rc_msg qeth_ipa_rc_msg[] = { {IPA_RC_SUCCESS, "success"}, {IPA_RC_NOTSUPP, "Command not supported"}, {IPA_RC_IP_TABLE_FULL, "Add Addr IP Table Full - ipv6"}, @@ -219,22 +219,23 @@ static struct ipa_rc_msg qeth_ipa_rc_msg[] = { -char *qeth_get_ipa_msg(enum qeth_ipa_return_codes rc) +const char *qeth_get_ipa_msg(enum qeth_ipa_return_codes rc) { - int x = 0; - qeth_ipa_rc_msg[ARRAY_SIZE(qeth_ipa_rc_msg) - 1].rc = rc; - while (qeth_ipa_rc_msg[x].rc != rc) - x++; + int x; + + for (x = 0; x < ARRAY_SIZE(qeth_ipa_rc_msg) - 1; x++) + if (qeth_ipa_rc_msg[x].rc == rc) + return qeth_ipa_rc_msg[x].msg; return qeth_ipa_rc_msg[x].msg; } struct ipa_cmd_names { enum qeth_ipa_cmds cmd; - char *name; + const char *name; }; -static struct ipa_cmd_names qeth_ipa_cmd_names[] = { +static const struct ipa_cmd_names qeth_ipa_cmd_names[] = { {IPA_CMD_STARTLAN, "startlan"}, {IPA_CMD_STOPLAN, "stoplan"}, {IPA_CMD_SETVMAC, "setvmac"}, @@ -266,11 +267,12 @@ static struct ipa_cmd_names qeth_ipa_cmd_names[] = { {IPA_CMD_UNKNOWN, "unknown"}, }; -char *qeth_get_ipa_cmd_name(enum qeth_ipa_cmds cmd) +const char *qeth_get_ipa_cmd_name(enum qeth_ipa_cmds cmd) { - int x = 0; - qeth_ipa_cmd_names[ARRAY_SIZE(qeth_ipa_cmd_names) - 1].cmd = cmd; - while (qeth_ipa_cmd_names[x].cmd != cmd) - x++; + int x; + + for (x = 0; x < ARRAY_SIZE(qeth_ipa_cmd_names) - 1; x++) + if (qeth_ipa_cmd_names[x].cmd == cmd) + return qeth_ipa_cmd_names[x].name; return qeth_ipa_cmd_names[x].name; } diff --git a/drivers/s390/net/qeth_core_mpc.h b/drivers/s390/net/qeth_core_mpc.h index aa8b9196b089..aa5de1fe01e1 100644 --- a/drivers/s390/net/qeth_core_mpc.h +++ b/drivers/s390/net/qeth_core_mpc.h @@ -797,8 +797,8 @@ enum qeth_ipa_arp_return_codes { QETH_IPA_ARP_RC_Q_NO_DATA = 0x0008, }; -extern char *qeth_get_ipa_msg(enum qeth_ipa_return_codes rc); -extern char *qeth_get_ipa_cmd_name(enum qeth_ipa_cmds cmd); +extern const char *qeth_get_ipa_msg(enum qeth_ipa_return_codes rc); +extern const char *qeth_get_ipa_cmd_name(enum qeth_ipa_cmds cmd); #define QETH_SETASS_BASE_LEN (sizeof(struct qeth_ipacmd_hdr) + \ sizeof(struct qeth_ipacmd_setassparms_hdr)) -- cgit v1.2.3 From ce7d17d6c607aa0d898f4712e75e27d319816b3b Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 27 Sep 2018 10:47:01 +0200 Subject: MAINTAINERS: change bridge maintainers I haven't been doing reviews only but not active development on bridge code for several years. Roopa and Nikolay have been doing most of the new features and have agreed to take over as new co-maintainers. Signed-off-by: Stephen Hemminger Acked-by: Roopa Prabhu Acked-by: Nikolay Aleksandrov --- MAINTAINERS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 02a39617ec82..b0ca9e214ef0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5470,7 +5470,8 @@ S: Odd Fixes F: drivers/net/ethernet/agere/ ETHERNET BRIDGE -M: Stephen Hemminger +M: Roopa Prabhu +M: Nikolay Aleksandrov L: bridge@lists.linux-foundation.org (moderated for non-subscribers) L: netdev@vger.kernel.org W: http://www.linuxfoundation.org/en/Net:Bridge -- cgit v1.2.3 From cb973127a793c5ade8102aa4ab7bb5e4b1e64190 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Wed, 26 Sep 2018 21:57:03 -0700 Subject: Update maintainers for bnx2/bnx2x/qlge/qlcnic drivers. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Ameen Rahman Signed-off-by: David S. Miller --- MAINTAINERS | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index b0ca9e214ef0..dcb0191c4f54 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2956,7 +2956,6 @@ F: include/linux/bcm963xx_tag.h BROADCOM BNX2 GIGABIT ETHERNET DRIVER M: Rasesh Mody -M: Harish Patil M: Dept-GELinuxNICDev@cavium.com L: netdev@vger.kernel.org S: Supported @@ -2977,6 +2976,7 @@ F: drivers/scsi/bnx2i/ BROADCOM BNX2X 10 GIGABIT ETHERNET DRIVER M: Ariel Elior +M: Sudarsana Kalluru M: everest-linux-l2@cavium.com L: netdev@vger.kernel.org S: Supported @@ -11974,7 +11974,7 @@ F: Documentation/scsi/LICENSE.qla4xxx F: drivers/scsi/qla4xxx/ QLOGIC QLCNIC (1/10)Gb ETHERNET DRIVER -M: Harish Patil +M: Shahed Shaikh M: Manish Chopra M: Dept-GELinuxNICDev@cavium.com L: netdev@vger.kernel.org @@ -11982,7 +11982,6 @@ S: Supported F: drivers/net/ethernet/qlogic/qlcnic/ QLOGIC QLGE 10Gb ETHERNET DRIVER -M: Harish Patil M: Manish Chopra M: Dept-GELinuxNICDev@cavium.com L: netdev@vger.kernel.org -- cgit v1.2.3 From 5f672090e44f4951084c5e1d6b0668a5fc422af8 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Thu, 27 Sep 2018 04:12:10 -0700 Subject: qed: Fix shmem structure inconsistency between driver and the mfw. The structure shared between driver and the management FW (mfw) differ in sizes. This would lead to issues when driver try to access the structure members which are not-aligned with the mfw copy e.g., data_ptr usage in the case of mfw_tlv request. Align the driver structure with mfw copy, add reserved field(s) to driver structure for the members not used by the driver. Fixes: dd006921d67f ("qed: Add MFW interfaces for TLV request support.) Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Michal Kalderon --- drivers/net/ethernet/qlogic/qed/qed_hsi.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h index 9b3ef00e5782..a71382687ef2 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h +++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h @@ -11987,6 +11987,7 @@ struct public_global { u32 running_bundle_id; s32 external_temperature; u32 mdump_reason; + u64 reserved; u32 data_ptr; u32 data_size; }; -- cgit v1.2.3 From c24498c6827b71f80fecc9fb1b70a792053d41a9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 27 Sep 2018 09:31:51 -0700 Subject: netpoll: do not test NAPI_STATE_SCHED in poll_one_napi() Since we do no longer require NAPI drivers to provide an ndo_poll_controller(), napi_schedule() has not been done before poll_one_napi() invocation. So testing NAPI_STATE_SCHED is likely to cause early returns. While we are at it, remove outdated comment. Note to future bisections : This change might surface prior bugs in drivers. See commit 73f21c653f93 ("bnxt_en: Fix TX timeout during netpoll.") for one occurrence. Fixes: ac3d9dd034e5 ("netpoll: make ndo_poll_controller() optional") Signed-off-by: Eric Dumazet Tested-by: Song Liu Cc: Michael Chan Signed-off-by: David S. Miller --- net/core/netpoll.c | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 3219a2932463..3ae899805f8b 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -135,27 +135,9 @@ static void queue_process(struct work_struct *work) } } -/* - * Check whether delayed processing was scheduled for our NIC. If so, - * we attempt to grab the poll lock and use ->poll() to pump the card. - * If this fails, either we've recursed in ->poll() or it's already - * running on another CPU. - * - * Note: we don't mask interrupts with this lock because we're using - * trylock here and interrupts are already disabled in the softirq - * case. Further, we test the poll_owner to avoid recursion on UP - * systems where the lock doesn't exist. - */ static void poll_one_napi(struct napi_struct *napi) { - int work = 0; - - /* net_rx_action's ->poll() invocations and our's are - * synchronized by this test which is only made while - * holding the napi->poll_lock. - */ - if (!test_bit(NAPI_STATE_SCHED, &napi->state)) - return; + int work; /* If we set this bit but see that it has already been set, * that indicates that napi has been disabled and we need -- cgit v1.2.3 From e71fb423e0dea3c9f98f0101e965426edfe849cd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 27 Sep 2018 09:31:52 -0700 Subject: hinic: remove ndo_poll_controller As diagnosed by Song Liu, ndo_poll_controller() can be very dangerous on loaded hosts, since the cpu calling ndo_poll_controller() might steal all NAPI contexts (for all RX/TX queues of the NIC). This capture can last for unlimited amount of time, since one cpu is generally not able to drain all the queues under load. hinic uses NAPI for TX completions, so we better let core networking stack call the napi->poll() to avoid the capture. Note that hinic_netpoll() was incorrectly scheduling NAPI on both RX and TX queues. Signed-off-by: Eric Dumazet Cc: Aviad Krawczyk Signed-off-by: David S. Miller --- drivers/net/ethernet/huawei/hinic/hinic_main.c | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c index 09e9da10b786..4a8f82938ed5 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_main.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c @@ -789,23 +789,6 @@ static void hinic_get_stats64(struct net_device *netdev, stats->tx_errors = nic_tx_stats->tx_dropped; } -#ifdef CONFIG_NET_POLL_CONTROLLER -static void hinic_netpoll(struct net_device *netdev) -{ - struct hinic_dev *nic_dev = netdev_priv(netdev); - int i, num_qps; - - num_qps = hinic_hwdev_num_qps(nic_dev->hwdev); - for (i = 0; i < num_qps; i++) { - struct hinic_txq *txq = &nic_dev->txqs[i]; - struct hinic_rxq *rxq = &nic_dev->rxqs[i]; - - napi_schedule(&txq->napi); - napi_schedule(&rxq->napi); - } -} -#endif - static const struct net_device_ops hinic_netdev_ops = { .ndo_open = hinic_open, .ndo_stop = hinic_close, @@ -818,9 +801,6 @@ static const struct net_device_ops hinic_netdev_ops = { .ndo_start_xmit = hinic_xmit_frame, .ndo_tx_timeout = hinic_tx_timeout, .ndo_get_stats64 = hinic_get_stats64, -#ifdef CONFIG_NET_POLL_CONTROLLER - .ndo_poll_controller = hinic_netpoll, -#endif }; static void netdev_features_init(struct net_device *netdev) -- cgit v1.2.3 From 226a2dd62c5d789088fcf7804fbe5613887870a5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 27 Sep 2018 09:31:53 -0700 Subject: ehea: remove ndo_poll_controller As diagnosed by Song Liu, ndo_poll_controller() can be very dangerous on loaded hosts, since the cpu calling ndo_poll_controller() might steal all NAPI contexts (for all RX/TX queues of the NIC). This capture can last for unlimited amount of time, since one cpu is generally not able to drain all the queues under load. ehea uses NAPI for TX completions, so we better let core networking stack call the napi->poll() to avoid the capture. Signed-off-by: Eric Dumazet Cc: Douglas Miller Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ehea/ehea_main.c | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c index ba580bfae512..03f64f40b2a3 100644 --- a/drivers/net/ethernet/ibm/ehea/ehea_main.c +++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c @@ -921,17 +921,6 @@ static int ehea_poll(struct napi_struct *napi, int budget) return rx; } -#ifdef CONFIG_NET_POLL_CONTROLLER -static void ehea_netpoll(struct net_device *dev) -{ - struct ehea_port *port = netdev_priv(dev); - int i; - - for (i = 0; i < port->num_def_qps; i++) - napi_schedule(&port->port_res[i].napi); -} -#endif - static irqreturn_t ehea_recv_irq_handler(int irq, void *param) { struct ehea_port_res *pr = param; @@ -2953,9 +2942,6 @@ static const struct net_device_ops ehea_netdev_ops = { .ndo_open = ehea_open, .ndo_stop = ehea_stop, .ndo_start_xmit = ehea_start_xmit, -#ifdef CONFIG_NET_POLL_CONTROLLER - .ndo_poll_controller = ehea_netpoll, -#endif .ndo_get_stats64 = ehea_get_stats64, .ndo_set_mac_address = ehea_set_mac_addr, .ndo_validate_addr = eth_validate_addr, -- cgit v1.2.3 From 4bd2c03be707253f1157bd759fdd6971e4f70403 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 27 Sep 2018 09:31:54 -0700 Subject: net: hns: remove ndo_poll_controller As diagnosed by Song Liu, ndo_poll_controller() can be very dangerous on loaded hosts, since the cpu calling ndo_poll_controller() might steal all NAPI contexts (for all RX/TX queues of the NIC). This capture can last for unlimited amount of time, since one cpu is generally not able to drain all the queues under load. hns uses NAPI for TX completions, so we better let core networking stack call the napi->poll() to avoid the capture. Signed-off-by: Eric Dumazet Cc: Yisen Zhuang Cc: Salil Mehta Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hns_enet.c | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index 5ce23d4b717e..28e907831b0e 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -1503,21 +1503,6 @@ static int hns_nic_do_ioctl(struct net_device *netdev, struct ifreq *ifr, return phy_mii_ioctl(phy_dev, ifr, cmd); } -/* use only for netconsole to poll with the device without interrupt */ -#ifdef CONFIG_NET_POLL_CONTROLLER -static void hns_nic_poll_controller(struct net_device *ndev) -{ - struct hns_nic_priv *priv = netdev_priv(ndev); - unsigned long flags; - int i; - - local_irq_save(flags); - for (i = 0; i < priv->ae_handle->q_num * 2; i++) - napi_schedule(&priv->ring_data[i].napi); - local_irq_restore(flags); -} -#endif - static netdev_tx_t hns_nic_net_xmit(struct sk_buff *skb, struct net_device *ndev) { @@ -1970,9 +1955,6 @@ static const struct net_device_ops hns_nic_netdev_ops = { .ndo_set_features = hns_nic_set_features, .ndo_fix_features = hns_nic_fix_features, .ndo_get_stats64 = hns_nic_get_stats64, -#ifdef CONFIG_NET_POLL_CONTROLLER - .ndo_poll_controller = hns_nic_poll_controller, -#endif .ndo_set_rx_mode = hns_nic_set_rx_mode, .ndo_select_queue = hns_nic_select_queue, }; -- cgit v1.2.3 From 260dd2c3e2aeefbe78065f0737dceae1ceb1196a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 27 Sep 2018 09:31:55 -0700 Subject: virtio_net: remove ndo_poll_controller As diagnosed by Song Liu, ndo_poll_controller() can be very dangerous on loaded hosts, since the cpu calling ndo_poll_controller() might steal all NAPI contexts (for all RX/TX queues of the NIC). This capture can last for unlimited amount of time, since one cpu is generally not able to drain all the queues under load. virto_net uses NAPI for TX completions, so we better let core networking stack call the napi->poll() to avoid the capture. Signed-off-by: Eric Dumazet Cc: "Michael S. Tsirkin" Cc: Jason Wang Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 765920905226..dab504ec5e50 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1699,17 +1699,6 @@ static void virtnet_stats(struct net_device *dev, tot->rx_frame_errors = dev->stats.rx_frame_errors; } -#ifdef CONFIG_NET_POLL_CONTROLLER -static void virtnet_netpoll(struct net_device *dev) -{ - struct virtnet_info *vi = netdev_priv(dev); - int i; - - for (i = 0; i < vi->curr_queue_pairs; i++) - napi_schedule(&vi->rq[i].napi); -} -#endif - static void virtnet_ack_link_announce(struct virtnet_info *vi) { rtnl_lock(); @@ -2447,9 +2436,6 @@ static const struct net_device_ops virtnet_netdev = { .ndo_get_stats64 = virtnet_stats, .ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid, -#ifdef CONFIG_NET_POLL_CONTROLLER - .ndo_poll_controller = virtnet_netpoll, -#endif .ndo_bpf = virtnet_xdp, .ndo_xdp_xmit = virtnet_xdp_xmit, .ndo_features_check = passthru_features_check, -- cgit v1.2.3 From 81b059b2187d77b957bf85318dbd4f36d60555e3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 27 Sep 2018 09:31:56 -0700 Subject: qlcnic: remove ndo_poll_controller As diagnosed by Song Liu, ndo_poll_controller() can be very dangerous on loaded hosts, since the cpu calling ndo_poll_controller() might steal all NAPI contexts (for all RX/TX queues of the NIC). This capture can last for unlimited amount of time, since one cpu is generally not able to drain all the queues under load. qlcnic uses NAPI for TX completions, so we better let core networking stack call the napi->poll() to avoid the capture. Signed-off-by: Eric Dumazet Cc: Harish Patil Cc: Manish Chopra Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c | 45 ------------------------ 1 file changed, 45 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index 2d38d1ac2aae..dbd48012224f 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -59,9 +59,6 @@ static int qlcnic_close(struct net_device *netdev); static void qlcnic_tx_timeout(struct net_device *netdev); static void qlcnic_attach_work(struct work_struct *work); static void qlcnic_fwinit_work(struct work_struct *work); -#ifdef CONFIG_NET_POLL_CONTROLLER -static void qlcnic_poll_controller(struct net_device *netdev); -#endif static void qlcnic_idc_debug_info(struct qlcnic_adapter *adapter, u8 encoding); static int qlcnic_can_start_firmware(struct qlcnic_adapter *adapter); @@ -545,9 +542,6 @@ static const struct net_device_ops qlcnic_netdev_ops = { .ndo_udp_tunnel_add = qlcnic_add_vxlan_port, .ndo_udp_tunnel_del = qlcnic_del_vxlan_port, .ndo_features_check = qlcnic_features_check, -#ifdef CONFIG_NET_POLL_CONTROLLER - .ndo_poll_controller = qlcnic_poll_controller, -#endif #ifdef CONFIG_QLCNIC_SRIOV .ndo_set_vf_mac = qlcnic_sriov_set_vf_mac, .ndo_set_vf_rate = qlcnic_sriov_set_vf_tx_rate, @@ -3200,45 +3194,6 @@ static irqreturn_t qlcnic_msix_tx_intr(int irq, void *data) return IRQ_HANDLED; } -#ifdef CONFIG_NET_POLL_CONTROLLER -static void qlcnic_poll_controller(struct net_device *netdev) -{ - struct qlcnic_adapter *adapter = netdev_priv(netdev); - struct qlcnic_host_sds_ring *sds_ring; - struct qlcnic_recv_context *recv_ctx; - struct qlcnic_host_tx_ring *tx_ring; - int ring; - - if (!test_bit(__QLCNIC_DEV_UP, &adapter->state)) - return; - - recv_ctx = adapter->recv_ctx; - - for (ring = 0; ring < adapter->drv_sds_rings; ring++) { - sds_ring = &recv_ctx->sds_rings[ring]; - qlcnic_disable_sds_intr(adapter, sds_ring); - napi_schedule(&sds_ring->napi); - } - - if (adapter->flags & QLCNIC_MSIX_ENABLED) { - /* Only Multi-Tx queue capable devices need to - * schedule NAPI for TX rings - */ - if ((qlcnic_83xx_check(adapter) && - (adapter->flags & QLCNIC_TX_INTR_SHARED)) || - (qlcnic_82xx_check(adapter) && - !qlcnic_check_multi_tx(adapter))) - return; - - for (ring = 0; ring < adapter->drv_tx_rings; ring++) { - tx_ring = &adapter->tx_ring[ring]; - qlcnic_disable_tx_intr(adapter, tx_ring); - napi_schedule(&tx_ring->napi); - } - } -} -#endif - static void qlcnic_idc_debug_info(struct qlcnic_adapter *adapter, u8 encoding) { -- cgit v1.2.3 From 3548fcf7d877c682c9a5a413c51929739192156d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 27 Sep 2018 09:31:57 -0700 Subject: qlogic: netxen: remove ndo_poll_controller As diagnosed by Song Liu, ndo_poll_controller() can be very dangerous on loaded hosts, since the cpu calling ndo_poll_controller() might steal all NAPI contexts (for all RX/TX queues of the NIC). This capture can last for unlimited amount of time, since one cpu is generally not able to drain all the queues under load. netxen uses NAPI for TX completions, so we better let core networking stack call the napi->poll() to avoid the capture. Signed-off-by: Eric Dumazet Cc: Manish Chopra Cc: Rahul Verma Signed-off-by: David S. Miller --- .../net/ethernet/qlogic/netxen/netxen_nic_main.c | 23 ---------------------- 1 file changed, 23 deletions(-) diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c index 69aa7fc392c5..59c70be22a84 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c @@ -72,9 +72,6 @@ static void netxen_schedule_work(struct netxen_adapter *adapter, work_func_t func, int delay); static void netxen_cancel_fw_work(struct netxen_adapter *adapter); static int netxen_nic_poll(struct napi_struct *napi, int budget); -#ifdef CONFIG_NET_POLL_CONTROLLER -static void netxen_nic_poll_controller(struct net_device *netdev); -#endif static void netxen_create_sysfs_entries(struct netxen_adapter *adapter); static void netxen_remove_sysfs_entries(struct netxen_adapter *adapter); @@ -581,9 +578,6 @@ static const struct net_device_ops netxen_netdev_ops = { .ndo_tx_timeout = netxen_tx_timeout, .ndo_fix_features = netxen_fix_features, .ndo_set_features = netxen_set_features, -#ifdef CONFIG_NET_POLL_CONTROLLER - .ndo_poll_controller = netxen_nic_poll_controller, -#endif }; static inline bool netxen_function_zero(struct pci_dev *pdev) @@ -2402,23 +2396,6 @@ static int netxen_nic_poll(struct napi_struct *napi, int budget) return work_done; } -#ifdef CONFIG_NET_POLL_CONTROLLER -static void netxen_nic_poll_controller(struct net_device *netdev) -{ - int ring; - struct nx_host_sds_ring *sds_ring; - struct netxen_adapter *adapter = netdev_priv(netdev); - struct netxen_recv_context *recv_ctx = &adapter->recv_ctx; - - disable_irq(adapter->irq); - for (ring = 0; ring < adapter->max_sds_rings; ring++) { - sds_ring = &recv_ctx->sds_rings[ring]; - netxen_intr(adapter->irq, sds_ring); - } - enable_irq(adapter->irq); -} -#endif - static int nx_incr_dev_ref_cnt(struct netxen_adapter *adapter) { -- cgit v1.2.3 From 21627982e4fff76a053f4d08d7fb56e532e08d52 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 27 Sep 2018 09:31:58 -0700 Subject: net: ena: remove ndo_poll_controller As diagnosed by Song Liu, ndo_poll_controller() can be very dangerous on loaded hosts, since the cpu calling ndo_poll_controller() might steal all NAPI contexts (for all RX/TX queues of the NIC). This capture can last for unlimited amount of time, since one cpu is generally not able to drain all the queues under load. ena uses NAPI for TX completions, so we better let core networking stack call the napi->poll() to avoid the capture. Signed-off-by: Eric Dumazet Cc: Netanel Belgazal Cc: Saeed Bishara Cc: Zorik Machulsky Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 29b5774dd32d..25621a218f20 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -2185,25 +2185,6 @@ error_drop_packet: return NETDEV_TX_OK; } -#ifdef CONFIG_NET_POLL_CONTROLLER -static void ena_netpoll(struct net_device *netdev) -{ - struct ena_adapter *adapter = netdev_priv(netdev); - int i; - - /* Dont schedule NAPI if the driver is in the middle of reset - * or netdev is down. - */ - - if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags) || - test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags)) - return; - - for (i = 0; i < adapter->num_queues; i++) - napi_schedule(&adapter->ena_napi[i].napi); -} -#endif /* CONFIG_NET_POLL_CONTROLLER */ - static u16 ena_select_queue(struct net_device *dev, struct sk_buff *skb, struct net_device *sb_dev, select_queue_fallback_t fallback) @@ -2369,9 +2350,6 @@ static const struct net_device_ops ena_netdev_ops = { .ndo_change_mtu = ena_change_mtu, .ndo_set_mac_address = NULL, .ndo_validate_addr = eth_validate_addr, -#ifdef CONFIG_NET_POLL_CONTROLLER - .ndo_poll_controller = ena_netpoll, -#endif /* CONFIG_NET_POLL_CONTROLLER */ }; static int ena_device_validate_params(struct ena_adapter *adapter, -- cgit v1.2.3 From 9447a10ff607debe5e30cc438fb56925a559b9d9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 27 Sep 2018 09:31:59 -0700 Subject: sfc: remove ndo_poll_controller As diagnosed by Song Liu, ndo_poll_controller() can be very dangerous on loaded hosts, since the cpu calling ndo_poll_controller() might steal all NAPI contexts (for all RX/TX queues of the NIC). This capture can last for unlimited amount of time, since one cpu is generally not able to drain all the queues under load. sfc uses NAPI for TX completions, so we better let core networking stack call the napi->poll() to avoid the capture. Signed-off-by: Eric Dumazet Cc: Edward Cree Cc: Bert Kenward Cc: Solarflare linux maintainers Acked-By: Bert Kenward Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/efx.c | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 330233286e78..3d0dd39c289e 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -2206,29 +2206,6 @@ static void efx_fini_napi(struct efx_nic *efx) efx_fini_napi_channel(channel); } -/************************************************************************** - * - * Kernel netpoll interface - * - *************************************************************************/ - -#ifdef CONFIG_NET_POLL_CONTROLLER - -/* Although in the common case interrupts will be disabled, this is not - * guaranteed. However, all our work happens inside the NAPI callback, - * so no locking is required. - */ -static void efx_netpoll(struct net_device *net_dev) -{ - struct efx_nic *efx = netdev_priv(net_dev); - struct efx_channel *channel; - - efx_for_each_channel(channel, efx) - efx_schedule_channel(channel); -} - -#endif - /************************************************************************** * * Kernel net device interface @@ -2509,9 +2486,6 @@ static const struct net_device_ops efx_netdev_ops = { #endif .ndo_get_phys_port_id = efx_get_phys_port_id, .ndo_get_phys_port_name = efx_get_phys_port_name, -#ifdef CONFIG_NET_POLL_CONTROLLER - .ndo_poll_controller = efx_netpoll, -#endif .ndo_setup_tc = efx_setup_tc, #ifdef CONFIG_RFS_ACCEL .ndo_rx_flow_steer = efx_filter_rfs, -- cgit v1.2.3 From a4f570be654de779eaf626a79c4e0aa5a790505f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 27 Sep 2018 09:32:00 -0700 Subject: sfc-falcon: remove ndo_poll_controller As diagnosed by Song Liu, ndo_poll_controller() can be very dangerous on loaded hosts, since the cpu calling ndo_poll_controller() might steal all NAPI contexts (for all RX/TX queues of the NIC). This capture can last for unlimited amount of time, since one cpu is generally not able to drain all the queues under load. sfc-falcon uses NAPI for TX completions, so we better let core networking stack call the napi->poll() to avoid the capture. Signed-off-by: Eric Dumazet Cc: Solarflare linux maintainers Cc: Edward Cree Cc: Bert Kenward Acked-By: Bert Kenward Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/falcon/efx.c | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/drivers/net/ethernet/sfc/falcon/efx.c b/drivers/net/ethernet/sfc/falcon/efx.c index dd5530a4f8c8..03e2455c502e 100644 --- a/drivers/net/ethernet/sfc/falcon/efx.c +++ b/drivers/net/ethernet/sfc/falcon/efx.c @@ -2052,29 +2052,6 @@ static void ef4_fini_napi(struct ef4_nic *efx) ef4_fini_napi_channel(channel); } -/************************************************************************** - * - * Kernel netpoll interface - * - *************************************************************************/ - -#ifdef CONFIG_NET_POLL_CONTROLLER - -/* Although in the common case interrupts will be disabled, this is not - * guaranteed. However, all our work happens inside the NAPI callback, - * so no locking is required. - */ -static void ef4_netpoll(struct net_device *net_dev) -{ - struct ef4_nic *efx = netdev_priv(net_dev); - struct ef4_channel *channel; - - ef4_for_each_channel(channel, efx) - ef4_schedule_channel(channel); -} - -#endif - /************************************************************************** * * Kernel net device interface @@ -2250,9 +2227,6 @@ static const struct net_device_ops ef4_netdev_ops = { .ndo_set_mac_address = ef4_set_mac_address, .ndo_set_rx_mode = ef4_set_rx_mode, .ndo_set_features = ef4_set_features, -#ifdef CONFIG_NET_POLL_CONTROLLER - .ndo_poll_controller = ef4_netpoll, -#endif .ndo_setup_tc = ef4_setup_tc, #ifdef CONFIG_RFS_ACCEL .ndo_rx_flow_steer = ef4_filter_rfs, -- cgit v1.2.3 From 0c3b9d1b37df16ae6046a5a01f769bf3d21b838c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 27 Sep 2018 09:32:01 -0700 Subject: ibmvnic: remove ndo_poll_controller As diagnosed by Song Liu, ndo_poll_controller() can be very dangerous on loaded hosts, since the cpu calling ndo_poll_controller() might steal all NAPI contexts (for all RX/TX queues of the NIC). This capture can last for unlimited amount of time, since one cpu is generally not able to drain all the queues under load. ibmvnic uses NAPI for TX completions, so we better let core networking stack call the napi->poll() to avoid the capture. ibmvnic_netpoll_controller() was completely wrong anyway, as it was scheduling NAPI to service RX queues (instead of TX), so I doubt netpoll ever worked on this driver. Signed-off-by: Eric Dumazet Cc: Thomas Falcon Cc: John Allen Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 4f0daf67b18d..699ef942b615 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -2207,19 +2207,6 @@ restart_poll: return frames_processed; } -#ifdef CONFIG_NET_POLL_CONTROLLER -static void ibmvnic_netpoll_controller(struct net_device *dev) -{ - struct ibmvnic_adapter *adapter = netdev_priv(dev); - int i; - - replenish_pools(netdev_priv(dev)); - for (i = 0; i < adapter->req_rx_queues; i++) - ibmvnic_interrupt_rx(adapter->rx_scrq[i]->irq, - adapter->rx_scrq[i]); -} -#endif - static int wait_for_reset(struct ibmvnic_adapter *adapter) { int rc, ret; @@ -2292,9 +2279,6 @@ static const struct net_device_ops ibmvnic_netdev_ops = { .ndo_set_mac_address = ibmvnic_set_mac, .ndo_validate_addr = eth_validate_addr, .ndo_tx_timeout = ibmvnic_tx_timeout, -#ifdef CONFIG_NET_POLL_CONTROLLER - .ndo_poll_controller = ibmvnic_netpoll_controller, -#endif .ndo_change_mtu = ibmvnic_change_mtu, .ndo_features_check = ibmvnic_features_check, }; -- cgit v1.2.3 From c4ce446e33d7a0e978256ac6fea4c80e59d9de5f Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 28 Sep 2018 16:18:50 -0700 Subject: asix: Check for supported Wake-on-LAN modes The driver currently silently accepts unsupported Wake-on-LAN modes (other than WAKE_PHY or WAKE_MAGIC) without reporting that to the user, which is confusing. Fixes: 2e55cc7210fe ("[PATCH] USB: usbnet (3/9) module for ASIX Ethernet adapters") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/usb/asix_common.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/usb/asix_common.c b/drivers/net/usb/asix_common.c index e95dd12edec4..023b8d0bf175 100644 --- a/drivers/net/usb/asix_common.c +++ b/drivers/net/usb/asix_common.c @@ -607,6 +607,9 @@ int asix_set_wol(struct net_device *net, struct ethtool_wolinfo *wolinfo) struct usbnet *dev = netdev_priv(net); u8 opt = 0; + if (wolinfo->wolopts & ~(WAKE_PHY | WAKE_MAGIC)) + return -EINVAL; + if (wolinfo->wolopts & WAKE_PHY) opt |= AX_MONITOR_LINK; if (wolinfo->wolopts & WAKE_MAGIC) -- cgit v1.2.3 From 5ba6b4aa9a410c5e2c6417df52b5e2118ea9b467 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 28 Sep 2018 16:18:51 -0700 Subject: ax88179_178a: Check for supported Wake-on-LAN modes The driver currently silently accepts unsupported Wake-on-LAN modes (other than WAKE_PHY or WAKE_MAGIC) without reporting that to the user, which is confusing. Fixes: e2ca90c276e1 ("ax88179_178a: ASIX AX88179_178A USB 3.0/2.0 to gigabit ethernet adapter driver") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/usb/ax88179_178a.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index 9e8ad372f419..2207f7a7d1ff 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -566,6 +566,9 @@ ax88179_set_wol(struct net_device *net, struct ethtool_wolinfo *wolinfo) struct usbnet *dev = netdev_priv(net); u8 opt = 0; + if (wolinfo->wolopts & ~(WAKE_PHY | WAKE_MAGIC)) + return -EINVAL; + if (wolinfo->wolopts & WAKE_PHY) opt |= AX_MONITOR_MODE_RWLC; if (wolinfo->wolopts & WAKE_MAGIC) -- cgit v1.2.3 From eb9ad088f96653a26b340f7c447c44cf023d5cdc Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 28 Sep 2018 16:18:52 -0700 Subject: lan78xx: Check for supported Wake-on-LAN modes The driver supports a fair amount of Wake-on-LAN modes, but is not checking that the user specified one that is supported. Fixes: 55d7de9de6c3 ("Microchip's LAN7800 family USB 2/3 to 10/100/1000 Ethernet device driver") Signed-off-by: Florian Fainelli Reviewed-by: Woojung Huh Signed-off-by: David S. Miller --- drivers/net/usb/lan78xx.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index a9991c5f4736..c3c9ba44e2a1 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -1401,19 +1401,10 @@ static int lan78xx_set_wol(struct net_device *netdev, if (ret < 0) return ret; - pdata->wol = 0; - if (wol->wolopts & WAKE_UCAST) - pdata->wol |= WAKE_UCAST; - if (wol->wolopts & WAKE_MCAST) - pdata->wol |= WAKE_MCAST; - if (wol->wolopts & WAKE_BCAST) - pdata->wol |= WAKE_BCAST; - if (wol->wolopts & WAKE_MAGIC) - pdata->wol |= WAKE_MAGIC; - if (wol->wolopts & WAKE_PHY) - pdata->wol |= WAKE_PHY; - if (wol->wolopts & WAKE_ARP) - pdata->wol |= WAKE_ARP; + if (wol->wolopts & ~WAKE_ALL) + return -EINVAL; + + pdata->wol = wol->wolopts; device_set_wakeup_enable(&dev->udev->dev, (bool)wol->wolopts); -- cgit v1.2.3 From c5cb93e994ffb43b7b3b1ff10b9f928f54574a36 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 28 Sep 2018 16:18:53 -0700 Subject: sr9800: Check for supported Wake-on-LAN modes The driver currently silently accepts unsupported Wake-on-LAN modes (other than WAKE_PHY or WAKE_MAGIC) without reporting that to the user, which is confusing. Fixes: 19a38d8e0aa3 ("USB2NET : SR9800 : One chip USB2.0 USB2NET SR9800 Device Driver Support") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/usb/sr9800.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/usb/sr9800.c b/drivers/net/usb/sr9800.c index 9277a0f228df..35f39f23d881 100644 --- a/drivers/net/usb/sr9800.c +++ b/drivers/net/usb/sr9800.c @@ -421,6 +421,9 @@ sr_set_wol(struct net_device *net, struct ethtool_wolinfo *wolinfo) struct usbnet *dev = netdev_priv(net); u8 opt = 0; + if (wolinfo->wolopts & ~(WAKE_PHY | WAKE_MAGIC)) + return -EINVAL; + if (wolinfo->wolopts & WAKE_PHY) opt |= SR_MONITOR_LINK; if (wolinfo->wolopts & WAKE_MAGIC) -- cgit v1.2.3 From f2750df1548bd8a2b060eb609fc43ca82811af4c Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 28 Sep 2018 16:18:54 -0700 Subject: r8152: Check for supported Wake-on-LAN Modes The driver does not check for Wake-on-LAN modes specified by an user, but will conditionally set the device as wake-up enabled or not based on that, which could be a very confusing user experience. Fixes: 21ff2e8976b1 ("r8152: support WOL") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 2cd71bdb6484..f1b5201cc320 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -4506,6 +4506,9 @@ static int rtl8152_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) if (!rtl_can_wakeup(tp)) return -EOPNOTSUPP; + if (wol->wolopts & ~WAKE_ANY) + return -EINVAL; + ret = usb_autopm_get_interface(tp->intf); if (ret < 0) goto out_set_wol; -- cgit v1.2.3 From 9c734b2769a73eea2e9e9767c0e0bf839ff23679 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 28 Sep 2018 16:18:55 -0700 Subject: smsc75xx: Check for Wake-on-LAN modes The driver does not check for Wake-on-LAN modes specified by an user, but will conditionally set the device as wake-up enabled or not based on that, which could be a very confusing user experience. Fixes: 6c636503260d ("smsc75xx: add wol magic packet support") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/usb/smsc75xx.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c index 05553d252446..e5a4cbb366dc 100644 --- a/drivers/net/usb/smsc75xx.c +++ b/drivers/net/usb/smsc75xx.c @@ -731,6 +731,9 @@ static int smsc75xx_ethtool_set_wol(struct net_device *net, struct smsc75xx_priv *pdata = (struct smsc75xx_priv *)(dev->data[0]); int ret; + if (wolinfo->wolopts & ~SUPPORTED_WAKE) + return -EINVAL; + pdata->wolopts = wolinfo->wolopts & SUPPORTED_WAKE; ret = device_set_wakeup_enable(&dev->udev->dev, pdata->wolopts); -- cgit v1.2.3 From c530c471ba37bdd9fe1c7185b01455c00ae606fb Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 28 Sep 2018 16:18:56 -0700 Subject: smsc95xx: Check for Wake-on-LAN modes The driver does not check for Wake-on-LAN modes specified by an user, but will conditionally set the device as wake-up enabled or not based on that, which could be a very confusing user experience. Fixes: e0e474a83c18 ("smsc95xx: add wol magic packet support") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/usb/smsc95xx.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index 06b4d290784d..262e7a3c23cb 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -774,6 +774,9 @@ static int smsc95xx_ethtool_set_wol(struct net_device *net, struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]); int ret; + if (wolinfo->wolopts & ~SUPPORTED_WAKE) + return -EINVAL; + pdata->wolopts = wolinfo->wolopts & SUPPORTED_WAKE; ret = device_set_wakeup_enable(&dev->udev->dev, pdata->wolopts); -- cgit v1.2.3 From c140eb166d681f66bd7e99fb121357db1a503e7f Mon Sep 17 00:00:00 2001 From: LUU Duc Canh Date: Wed, 26 Sep 2018 21:00:54 +0200 Subject: tipc: fix failover problem We see the following scenario: 1) Link endpoint B on node 1 discovers that its peer endpoint is gone. Since there is a second working link, failover procedure is started. 2) Link endpoint A on node 1 sends a FAILOVER message to peer endpoint A on node 2. The node item 1->2 goes to state FAILINGOVER. 3) Linke endpoint A/2 receives the failover, and is supposed to take down its parallell link endpoint B/2, while producing a FAILOVER message to send back to A/1. 4) However, B/2 has already been deleted, so no FAILOVER message can created. 5) Node 1->2 remains in state FAILINGOVER forever, refusing to receive any messages that can bring B/1 up again. We are left with a non- redundant link between node 1 and 2. We fix this with letting endpoint A/2 build a dummy FAILOVER message to send to back to A/1, so that the situation can be resolved. Signed-off-by: LUU Duc Canh Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/link.c | 35 +++++++++++++++++++++++++++++++++++ net/tipc/link.h | 3 +++ net/tipc/node.c | 11 +++++++++++ 3 files changed, 49 insertions(+) diff --git a/net/tipc/link.c b/net/tipc/link.c index 26cc033ee167..4ed650ce6e61 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -410,6 +410,11 @@ char *tipc_link_name(struct tipc_link *l) return l->name; } +u32 tipc_link_state(struct tipc_link *l) +{ + return l->state; +} + /** * tipc_link_create - create a new link * @n: pointer to associated node @@ -1385,6 +1390,36 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, __skb_queue_tail(xmitq, skb); } +void tipc_link_create_dummy_tnl_msg(struct tipc_link *l, + struct sk_buff_head *xmitq) +{ + u32 onode = tipc_own_addr(l->net); + struct tipc_msg *hdr, *ihdr; + struct sk_buff_head tnlq; + struct sk_buff *skb; + u32 dnode = l->addr; + + skb_queue_head_init(&tnlq); + skb = tipc_msg_create(TUNNEL_PROTOCOL, FAILOVER_MSG, + INT_H_SIZE, BASIC_H_SIZE, + dnode, onode, 0, 0, 0); + if (!skb) { + pr_warn("%sunable to create tunnel packet\n", link_co_err); + return; + } + + hdr = buf_msg(skb); + msg_set_msgcnt(hdr, 1); + msg_set_bearer_id(hdr, l->peer_bearer_id); + + ihdr = (struct tipc_msg *)msg_data(hdr); + tipc_msg_init(onode, ihdr, TIPC_LOW_IMPORTANCE, TIPC_DIRECT_MSG, + BASIC_H_SIZE, dnode); + msg_set_errcode(ihdr, TIPC_ERR_NO_PORT); + __skb_queue_tail(&tnlq, skb); + tipc_link_xmit(l, &tnlq, xmitq); +} + /* tipc_link_tnl_prepare(): prepare and return a list of tunnel packets * with contents of the link's transmit and backlog queues. */ diff --git a/net/tipc/link.h b/net/tipc/link.h index 7bc494a33fdf..90488c538a4e 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -88,6 +88,8 @@ bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer, struct tipc_link **link); void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl, int mtyp, struct sk_buff_head *xmitq); +void tipc_link_create_dummy_tnl_msg(struct tipc_link *tnl, + struct sk_buff_head *xmitq); void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq); int tipc_link_fsm_evt(struct tipc_link *l, int evt); bool tipc_link_is_up(struct tipc_link *l); @@ -107,6 +109,7 @@ u16 tipc_link_rcv_nxt(struct tipc_link *l); u16 tipc_link_acked(struct tipc_link *l); u32 tipc_link_id(struct tipc_link *l); char *tipc_link_name(struct tipc_link *l); +u32 tipc_link_state(struct tipc_link *l); char tipc_link_plane(struct tipc_link *l); int tipc_link_prio(struct tipc_link *l); int tipc_link_window(struct tipc_link *l); diff --git a/net/tipc/node.c b/net/tipc/node.c index 68014f1b6976..b0ee25f1f2e6 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -111,6 +111,7 @@ struct tipc_node { int action_flags; struct list_head list; int state; + bool failover_sent; u16 sync_point; int link_cnt; u16 working_links; @@ -680,6 +681,7 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id, *slot0 = bearer_id; *slot1 = bearer_id; tipc_node_fsm_evt(n, SELF_ESTABL_CONTACT_EVT); + n->failover_sent = false; n->action_flags |= TIPC_NOTIFY_NODE_UP; tipc_link_set_active(nl, true); tipc_bcast_add_peer(n->net, nl, xmitq); @@ -1615,6 +1617,15 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, tipc_skb_queue_splice_tail_init(tipc_link_inputq(pl), tipc_link_inputq(l)); } + /* If parallel link was already down, and this happened before + * the tunnel link came up, FAILOVER was never sent. Ensure that + * FAILOVER is sent to get peer out of NODE_FAILINGOVER state. + */ + if (n->state != NODE_FAILINGOVER && !n->failover_sent) { + tipc_link_create_dummy_tnl_msg(l, xmitq); + n->failover_sent = true; + } + /* If pkts arrive out of order, use lowest calculated syncpt */ if (less(syncpt, n->sync_point)) n->sync_point = syncpt; -- cgit v1.2.3 From c333fa0c4f220f8f7ea5acd6b0ebf3bf13fd684d Mon Sep 17 00:00:00 2001 From: Shahed Shaikh Date: Wed, 26 Sep 2018 12:41:10 -0700 Subject: qlcnic: fix Tx descriptor corruption on 82xx devices In regular NIC transmission flow, driver always configures MAC using Tx queue zero descriptor as a part of MAC learning flow. But with multi Tx queue supported NIC, regular transmission can occur on any non-zero Tx queue and from that context it uses Tx queue zero descriptor to configure MAC, at the same time TX queue zero could be used by another CPU for regular transmission which could lead to Tx queue zero descriptor corruption and cause FW abort. This patch fixes this in such a way that driver always configures learned MAC address from the same Tx queue which is used for regular transmission. Fixes: 7e2cf4feba05 ("qlcnic: change driver hardware interface mechanism") Signed-off-by: Shahed Shaikh Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qlcnic/qlcnic.h | 8 +++++--- drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c | 3 ++- drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h | 3 ++- drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h | 3 ++- drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c | 12 ++++++------ 5 files changed, 17 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h index 81312924df14..0c443ea98479 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h @@ -1800,7 +1800,8 @@ struct qlcnic_hardware_ops { int (*config_loopback) (struct qlcnic_adapter *, u8); int (*clear_loopback) (struct qlcnic_adapter *, u8); int (*config_promisc_mode) (struct qlcnic_adapter *, u32); - void (*change_l2_filter) (struct qlcnic_adapter *, u64 *, u16); + void (*change_l2_filter)(struct qlcnic_adapter *adapter, u64 *addr, + u16 vlan, struct qlcnic_host_tx_ring *tx_ring); int (*get_board_info) (struct qlcnic_adapter *); void (*set_mac_filter_count) (struct qlcnic_adapter *); void (*free_mac_list) (struct qlcnic_adapter *); @@ -2064,9 +2065,10 @@ static inline int qlcnic_nic_set_promisc(struct qlcnic_adapter *adapter, } static inline void qlcnic_change_filter(struct qlcnic_adapter *adapter, - u64 *addr, u16 id) + u64 *addr, u16 vlan, + struct qlcnic_host_tx_ring *tx_ring) { - adapter->ahw->hw_ops->change_l2_filter(adapter, addr, id); + adapter->ahw->hw_ops->change_l2_filter(adapter, addr, vlan, tx_ring); } static inline int qlcnic_get_board_info(struct qlcnic_adapter *adapter) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c index 569d54ededec..a79d84f99102 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c @@ -2135,7 +2135,8 @@ out: } void qlcnic_83xx_change_l2_filter(struct qlcnic_adapter *adapter, u64 *addr, - u16 vlan_id) + u16 vlan_id, + struct qlcnic_host_tx_ring *tx_ring) { u8 mac[ETH_ALEN]; memcpy(&mac, addr, ETH_ALEN); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h index b75a81246856..73fe2f64491d 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h @@ -550,7 +550,8 @@ int qlcnic_83xx_wrt_reg_indirect(struct qlcnic_adapter *, ulong, u32); int qlcnic_83xx_nic_set_promisc(struct qlcnic_adapter *, u32); int qlcnic_83xx_config_hw_lro(struct qlcnic_adapter *, int); int qlcnic_83xx_config_rss(struct qlcnic_adapter *, int); -void qlcnic_83xx_change_l2_filter(struct qlcnic_adapter *, u64 *, u16); +void qlcnic_83xx_change_l2_filter(struct qlcnic_adapter *adapter, u64 *addr, + u16 vlan, struct qlcnic_host_tx_ring *ring); int qlcnic_83xx_get_pci_info(struct qlcnic_adapter *, struct qlcnic_pci_info *); int qlcnic_83xx_set_nic_info(struct qlcnic_adapter *, struct qlcnic_info *); void qlcnic_83xx_initialize_nic(struct qlcnic_adapter *, int); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h index 4bb33af8e2b3..56a3bd9e37dc 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h @@ -173,7 +173,8 @@ int qlcnic_82xx_napi_add(struct qlcnic_adapter *adapter, struct net_device *netdev); void qlcnic_82xx_get_beacon_state(struct qlcnic_adapter *); void qlcnic_82xx_change_filter(struct qlcnic_adapter *adapter, - u64 *uaddr, u16 vlan_id); + u64 *uaddr, u16 vlan_id, + struct qlcnic_host_tx_ring *tx_ring); int qlcnic_82xx_config_intr_coalesce(struct qlcnic_adapter *, struct ethtool_coalesce *); int qlcnic_82xx_set_rx_coalesce(struct qlcnic_adapter *); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c index 84dd83031a1b..9647578cbe6a 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c @@ -268,13 +268,12 @@ static void qlcnic_add_lb_filter(struct qlcnic_adapter *adapter, } void qlcnic_82xx_change_filter(struct qlcnic_adapter *adapter, u64 *uaddr, - u16 vlan_id) + u16 vlan_id, struct qlcnic_host_tx_ring *tx_ring) { struct cmd_desc_type0 *hwdesc; struct qlcnic_nic_req *req; struct qlcnic_mac_req *mac_req; struct qlcnic_vlan_req *vlan_req; - struct qlcnic_host_tx_ring *tx_ring = adapter->tx_ring; u32 producer; u64 word; @@ -301,7 +300,8 @@ void qlcnic_82xx_change_filter(struct qlcnic_adapter *adapter, u64 *uaddr, static void qlcnic_send_filter(struct qlcnic_adapter *adapter, struct cmd_desc_type0 *first_desc, - struct sk_buff *skb) + struct sk_buff *skb, + struct qlcnic_host_tx_ring *tx_ring) { struct vlan_ethhdr *vh = (struct vlan_ethhdr *)(skb->data); struct ethhdr *phdr = (struct ethhdr *)(skb->data); @@ -335,7 +335,7 @@ static void qlcnic_send_filter(struct qlcnic_adapter *adapter, tmp_fil->vlan_id == vlan_id) { if (jiffies > (QLCNIC_READD_AGE * HZ + tmp_fil->ftime)) qlcnic_change_filter(adapter, &src_addr, - vlan_id); + vlan_id, tx_ring); tmp_fil->ftime = jiffies; return; } @@ -350,7 +350,7 @@ static void qlcnic_send_filter(struct qlcnic_adapter *adapter, if (!fil) return; - qlcnic_change_filter(adapter, &src_addr, vlan_id); + qlcnic_change_filter(adapter, &src_addr, vlan_id, tx_ring); fil->ftime = jiffies; fil->vlan_id = vlan_id; memcpy(fil->faddr, &src_addr, ETH_ALEN); @@ -766,7 +766,7 @@ netdev_tx_t qlcnic_xmit_frame(struct sk_buff *skb, struct net_device *netdev) } if (adapter->drv_mac_learn) - qlcnic_send_filter(adapter, first_desc, skb); + qlcnic_send_filter(adapter, first_desc, skb, tx_ring); tx_ring->tx_stats.tx_bytes += skb->len; tx_ring->tx_stats.xmit_called++; -- cgit v1.2.3 From ac8bd9e13be22a3d24bfc80972d4688e3e50e457 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 26 Sep 2018 22:12:39 +0200 Subject: r8169: Disable clk during suspend / resume Disable the clk during suspend to save power. Note that tp->clk may be NULL, the clk core functions handle this without problems. Reviewed-by: Andy Shevchenko Tested-by: Carlo Caione Signed-off-by: Hans de Goede Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index ab30aaeac6d3..d6b53f53909a 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -6869,8 +6869,10 @@ static int rtl8169_suspend(struct device *device) { struct pci_dev *pdev = to_pci_dev(device); struct net_device *dev = pci_get_drvdata(pdev); + struct rtl8169_private *tp = netdev_priv(dev); rtl8169_net_suspend(dev); + clk_disable_unprepare(tp->clk); return 0; } @@ -6898,6 +6900,9 @@ static int rtl8169_resume(struct device *device) { struct pci_dev *pdev = to_pci_dev(device); struct net_device *dev = pci_get_drvdata(pdev); + struct rtl8169_private *tp = netdev_priv(dev); + + clk_prepare_enable(tp->clk); if (netif_running(dev)) __rtl8169_resume(dev); -- cgit v1.2.3 From 43955a45dc0b4f3be7f0c3afc0e080ed59bb5280 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 26 Sep 2018 22:19:42 +0200 Subject: netlink: fix typo in nla_parse_nested() comment Fix a simple typo: attribuets -> attributes Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- include/net/netlink.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/netlink.h b/include/net/netlink.h index 0c154f98e987..39e1d875d507 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -153,7 +153,7 @@ * nla_find() find attribute in stream of attributes * nla_find_nested() find attribute in nested attributes * nla_parse() parse and validate stream of attrs - * nla_parse_nested() parse nested attribuets + * nla_parse_nested() parse nested attributes * nla_for_each_attr() loop over all attributes * nla_for_each_nested() loop over the nested attributes *========================================================================= -- cgit v1.2.3 From 4d8fcf216c90bc25e34ae2200aa8985ee3158898 Mon Sep 17 00:00:00 2001 From: Alaa Hleihel Date: Wed, 5 Sep 2018 11:43:23 +0300 Subject: net/mlx5e: Avoid unbounded peer devices when unpairing TC hairpin rules If the peer device was already unbound, then do not attempt to modify it's resources, otherwise we will crash on dereferencing non-existing device. Fixes: 5c65c564c962 ("net/mlx5e: Support offloading TC NIC hairpin flows") Signed-off-by: Alaa Hleihel Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 + drivers/net/ethernet/mellanox/mlx5/core/en/fs.h | 2 + drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 62 +++++++++++++++++++++- drivers/net/ethernet/mellanox/mlx5/core/transobj.c | 5 +- include/linux/mlx5/transobj.h | 2 + 6 files changed, 71 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index db2cfcd21d43..0f189f873859 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -54,6 +54,7 @@ #include "en_stats.h" #include "en/fs.h" +extern const struct net_device_ops mlx5e_netdev_ops; struct page_pool; #define MLX5E_METADATA_ETHER_TYPE (0x8CE4) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h index bbf69e859b78..1431232c9a09 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h @@ -16,6 +16,8 @@ struct mlx5e_tc_table { DECLARE_HASHTABLE(mod_hdr_tbl, 8); DECLARE_HASHTABLE(hairpin_tbl, 8); + + struct notifier_block netdevice_nb; }; struct mlx5e_flow_table { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 54118b77dc1f..f291d1bf1558 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4315,7 +4315,7 @@ static int mlx5e_xdp(struct net_device *dev, struct netdev_bpf *xdp) } } -static const struct net_device_ops mlx5e_netdev_ops = { +const struct net_device_ops mlx5e_netdev_ops = { .ndo_open = mlx5e_open, .ndo_stop = mlx5e_close, .ndo_start_xmit = mlx5e_xmit, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 9fed54017659..52e05f3ece50 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -2946,14 +2946,71 @@ int mlx5e_stats_flower(struct mlx5e_priv *priv, return 0; } +static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv, + struct mlx5e_priv *peer_priv) +{ + struct mlx5_core_dev *peer_mdev = peer_priv->mdev; + struct mlx5e_hairpin_entry *hpe; + u16 peer_vhca_id; + int bkt; + + if (!same_hw_devs(priv, peer_priv)) + return; + + peer_vhca_id = MLX5_CAP_GEN(peer_mdev, vhca_id); + + hash_for_each(priv->fs.tc.hairpin_tbl, bkt, hpe, hairpin_hlist) { + if (hpe->peer_vhca_id == peer_vhca_id) + hpe->hp->pair->peer_gone = true; + } +} + +static int mlx5e_tc_netdev_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct net_device *ndev = netdev_notifier_info_to_dev(ptr); + struct mlx5e_flow_steering *fs; + struct mlx5e_priv *peer_priv; + struct mlx5e_tc_table *tc; + struct mlx5e_priv *priv; + + if (ndev->netdev_ops != &mlx5e_netdev_ops || + event != NETDEV_UNREGISTER || + ndev->reg_state == NETREG_REGISTERED) + return NOTIFY_DONE; + + tc = container_of(this, struct mlx5e_tc_table, netdevice_nb); + fs = container_of(tc, struct mlx5e_flow_steering, tc); + priv = container_of(fs, struct mlx5e_priv, fs); + peer_priv = netdev_priv(ndev); + if (priv == peer_priv || + !(priv->netdev->features & NETIF_F_HW_TC)) + return NOTIFY_DONE; + + mlx5e_tc_hairpin_update_dead_peer(priv, peer_priv); + + return NOTIFY_DONE; +} + int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { struct mlx5e_tc_table *tc = &priv->fs.tc; + int err; hash_init(tc->mod_hdr_tbl); hash_init(tc->hairpin_tbl); - return rhashtable_init(&tc->ht, &tc_ht_params); + err = rhashtable_init(&tc->ht, &tc_ht_params); + if (err) + return err; + + tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event; + if (register_netdevice_notifier(&tc->netdevice_nb)) { + tc->netdevice_nb.notifier_call = NULL; + mlx5_core_warn(priv->mdev, "Failed to register netdev notifier\n"); + } + + return err; } static void _mlx5e_tc_del_flow(void *ptr, void *arg) @@ -2969,6 +3026,9 @@ void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) { struct mlx5e_tc_table *tc = &priv->fs.tc; + if (tc->netdevice_nb.notifier_call) + unregister_netdevice_notifier(&tc->netdevice_nb); + rhashtable_free_and_destroy(&tc->ht, _mlx5e_tc_del_flow, NULL); if (!IS_ERR_OR_NULL(tc->t)) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c index d2f76070ea7c..a1ee9a8a769e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c @@ -475,7 +475,8 @@ static void mlx5_hairpin_destroy_queues(struct mlx5_hairpin *hp) for (i = 0; i < hp->num_channels; i++) { mlx5_core_destroy_rq(hp->func_mdev, hp->rqn[i]); - mlx5_core_destroy_sq(hp->peer_mdev, hp->sqn[i]); + if (!hp->peer_gone) + mlx5_core_destroy_sq(hp->peer_mdev, hp->sqn[i]); } } @@ -567,6 +568,8 @@ static void mlx5_hairpin_unpair_queues(struct mlx5_hairpin *hp) MLX5_RQC_STATE_RST, 0, 0); /* unset peer SQs */ + if (hp->peer_gone) + return; for (i = 0; i < hp->num_channels; i++) mlx5_hairpin_modify_sq(hp->peer_mdev, hp->sqn[i], MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_RST, 0, 0); diff --git a/include/linux/mlx5/transobj.h b/include/linux/mlx5/transobj.h index 83a33a1873a6..7f5ca2cd3a32 100644 --- a/include/linux/mlx5/transobj.h +++ b/include/linux/mlx5/transobj.h @@ -90,6 +90,8 @@ struct mlx5_hairpin { u32 *rqn; u32 *sqn; + + bool peer_gone; }; struct mlx5_hairpin * -- cgit v1.2.3 From 11aa5800ed66ed0415b7509f02881c76417d212a Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Sun, 16 Sep 2018 14:45:27 +0300 Subject: net/mlx5: E-Switch, Fix out of bound access when setting vport rate The code that deals with eswitch vport bw guarantee was going beyond the eswitch vport array limit, fix that. This was pointed out by the kernel address sanitizer (KASAN). The error from KASAN log: [2018-09-15 15:04:45] BUG: KASAN: slab-out-of-bounds in mlx5_eswitch_set_vport_rate+0x8c1/0xae0 [mlx5_core] Fixes: c9497c98901c ("net/mlx5: Add support for setting VF min rate") Signed-off-by: Eran Ben Elisha Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 2b252cde5cc2..ea7dedc2d5ad 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -2000,7 +2000,7 @@ static u32 calculate_vports_min_rate_divider(struct mlx5_eswitch *esw) u32 max_guarantee = 0; int i; - for (i = 0; i <= esw->total_vports; i++) { + for (i = 0; i < esw->total_vports; i++) { evport = &esw->vports[i]; if (!evport->enabled || evport->info.min_rate < max_guarantee) continue; @@ -2020,7 +2020,7 @@ static int normalize_vports_min_rate(struct mlx5_eswitch *esw, u32 divider) int err; int i; - for (i = 0; i <= esw->total_vports; i++) { + for (i = 0; i < esw->total_vports; i++) { evport = &esw->vports[i]; if (!evport->enabled) continue; -- cgit v1.2.3 From cee26487620bc9bc3c7db21b6984d91f7bae12ae Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Sat, 25 Aug 2018 03:29:58 +0000 Subject: net/mlx5e: Set vlan masks for all offloaded TC rules In flow steering, if asked to, the hardware matches on the first ethertype which is not vlan. It's possible to set a rule as follows, which is meant to match on untagged packet, but will match on a vlan packet: tc filter add dev eth0 parent ffff: protocol ip flower ... To avoid this for packets with single tag, we set vlan masks to tell hardware to check the tags for every matched packet. Fixes: 095b6cfd69ce ('net/mlx5e: Add TC vlan match parsing') Signed-off-by: Jianbo Liu Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 52e05f3ece50..85796727093e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1368,6 +1368,9 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, *match_level = MLX5_MATCH_L2; } + } else { + MLX5_SET(fte_match_set_lyr_2_4, headers_c, svlan_tag, 1); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1); } if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CVLAN)) { -- cgit v1.2.3 From 1ad98e9d1bdf4724c0a8532fabd84bf3c457c2bc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 1 Oct 2018 15:02:26 -0700 Subject: tcp/dccp: fix lockdep issue when SYN is backlogged In normal SYN processing, packets are handled without listener lock and in RCU protected ingress path. But syzkaller is known to be able to trick us and SYN packets might be processed in process context, after being queued into socket backlog. In commit 06f877d613be ("tcp/dccp: fix other lockdep splats accessing ireq_opt") I made a very stupid fix, that happened to work mostly because of the regular path being RCU protected. Really the thing protecting ireq->ireq_opt is RCU read lock, and the pseudo request refcnt is not relevant. This patch extends what I did in commit 449809a66c1d ("tcp/dccp: block BH for SYN processing") by adding an extra rcu_read_{lock|unlock} pair in the paths that might be taken when processing SYN from socket backlog (thus possibly in process context) Fixes: 06f877d613be ("tcp/dccp: fix other lockdep splats accessing ireq_opt") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller --- include/net/inet_sock.h | 3 +-- net/dccp/input.c | 4 +++- net/ipv4/tcp_input.c | 4 +++- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index e03b93360f33..a8cd5cf9ff5b 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -132,8 +132,7 @@ static inline int inet_request_bound_dev_if(const struct sock *sk, static inline struct ip_options_rcu *ireq_opt_deref(const struct inet_request_sock *ireq) { - return rcu_dereference_check(ireq->ireq_opt, - refcount_read(&ireq->req.rsk_refcnt) > 0); + return rcu_dereference(ireq->ireq_opt); } struct inet_cork { diff --git a/net/dccp/input.c b/net/dccp/input.c index d28d46bff6ab..85d6c879383d 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -606,11 +606,13 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, if (sk->sk_state == DCCP_LISTEN) { if (dh->dccph_type == DCCP_PKT_REQUEST) { /* It is possible that we process SYN packets from backlog, - * so we need to make sure to disable BH right there. + * so we need to make sure to disable BH and RCU right there. */ + rcu_read_lock(); local_bh_disable(); acceptable = inet_csk(sk)->icsk_af_ops->conn_request(sk, skb) >= 0; local_bh_enable(); + rcu_read_unlock(); if (!acceptable) return 1; consume_skb(skb); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 4cf2f7bb2802..47e08c1b5bc3 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6009,11 +6009,13 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) if (th->fin) goto discard; /* It is possible that we process SYN packets from backlog, - * so we need to make sure to disable BH right there. + * so we need to make sure to disable BH and RCU right there. */ + rcu_read_lock(); local_bh_disable(); acceptable = icsk->icsk_af_ops->conn_request(sk, skb) >= 0; local_bh_enable(); + rcu_read_unlock(); if (!acceptable) return 1; -- cgit v1.2.3 From aeadd93f2b0a609f603ac33e574b97a9832d1b90 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 22 Sep 2018 16:46:48 +0300 Subject: net: sched: act_ipt: check for underflow in __tcf_ipt_init() If "td->u.target_size" is larger than sizeof(struct xt_entry_target) we return -EINVAL. But we don't check whether it's smaller than sizeof(struct xt_entry_target) and that could lead to an out of bounds read. Fixes: 7ba699c604ab ("[NET_SCHED]: Convert actions from rtnetlink to new netlink API") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- net/sched/act_ipt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 23273b5303fd..8525de811616 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -135,7 +135,7 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla, } td = (struct xt_entry_target *)nla_data(tb[TCA_IPT_TARG]); - if (nla_len(tb[TCA_IPT_TARG]) < td->u.target_size) { + if (nla_len(tb[TCA_IPT_TARG]) != td->u.target_size) { if (exists) tcf_idr_release(*a, bind); else -- cgit v1.2.3 From d949cfedbcbab4e91590576cbace2671924ad69c Mon Sep 17 00:00:00 2001 From: LUU Duc Canh Date: Wed, 26 Sep 2018 22:28:52 +0200 Subject: tipc: ignore STATE_MSG on wrong link session The initial session number when a link is created is based on a random value, taken from struct tipc_net->random. It is then incremented for each link reset to avoid mixing protocol messages from different link sessions. However, when a bearer is reset all its links are deleted, and will later be re-created using the same random value as the first time. This means that if the link never went down between creation and deletion we will still sometimes have two subsequent sessions with the same session number. In virtual environments with potentially long transmission times this has turned out to be a real problem. We now fix this by randomizing the session number each time a link is created. With a session number size of 16 bits this gives a risk of session collision of 1/64k. To reduce this further, we also introduce a sanity check on the very first STATE message arriving at a link. If this has an acknowledge value differing from 0, which is logically impossible, we ignore the message. The final risk for session collision is hence reduced to 1/4G, which should be sufficient. Signed-off-by: LUU Duc Canh Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/link.c | 3 +++ net/tipc/node.c | 5 +++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/net/tipc/link.c b/net/tipc/link.c index 4ed650ce6e61..fb886b525d95 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1516,6 +1516,9 @@ bool tipc_link_validate_msg(struct tipc_link *l, struct tipc_msg *hdr) return false; if (session != curr_session) return false; + /* Extra sanity check */ + if (!link_is_up(l) && msg_ack(hdr)) + return false; if (!(l->peer_caps & TIPC_LINK_PROTO_SEQNO)) return true; /* Accept only STATE with new sequence number */ diff --git a/net/tipc/node.c b/net/tipc/node.c index b0ee25f1f2e6..2afc4f8c37a7 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -913,6 +913,7 @@ void tipc_node_check_dest(struct net *net, u32 addr, bool reset = true; char *if_name; unsigned long intv; + u16 session; *dupl_addr = false; *respond = false; @@ -999,9 +1000,10 @@ void tipc_node_check_dest(struct net *net, u32 addr, goto exit; if_name = strchr(b->name, ':') + 1; + get_random_bytes(&session, sizeof(u16)); if (!tipc_link_create(net, if_name, b->identity, b->tolerance, b->net_plane, b->mtu, b->priority, - b->window, mod(tipc_net(net)->random), + b->window, session, tipc_own_addr(net), addr, peer_id, n->capabilities, tipc_bc_sndlink(n->net), n->bc_entry.link, @@ -1625,7 +1627,6 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, tipc_link_create_dummy_tnl_msg(l, xmitq); n->failover_sent = true; } - /* If pkts arrive out of order, use lowest calculated syncpt */ if (less(syncpt, n->sync_point)) n->sync_point = syncpt; -- cgit v1.2.3 From 7f6d6558ae44bc193eb28df3617c364d3bb6df39 Mon Sep 17 00:00:00 2001 From: Flavio Leitner Date: Fri, 28 Sep 2018 14:55:34 -0300 Subject: Revert "openvswitch: Fix template leak in error cases." This reverts commit 90c7afc96cbbd77f44094b5b651261968e97de67. When the commit was merged, the code used nf_ct_put() to free the entry, but later on commit 76644232e612 ("openvswitch: Free tmpl with tmpl_free.") replaced that with nf_ct_tmpl_free which is a more appropriate. Now the original problem is removed. Then 44d6e2f27328 ("net: Replace NF_CT_ASSERT() with WARN_ON().") replaced a debug assert with a WARN_ON() which is trigged now. Signed-off-by: Flavio Leitner Acked-by: Joe Stringer Signed-off-by: David S. Miller --- net/openvswitch/conntrack.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 86a75105af1a..0aeb34c6389d 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -1624,10 +1624,6 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr, OVS_NLERR(log, "Failed to allocate conntrack template"); return -ENOMEM; } - - __set_bit(IPS_CONFIRMED_BIT, &ct_info.ct->status); - nf_conntrack_get(&ct_info.ct->ct_general); - if (helper) { err = ovs_ct_add_helper(&ct_info, helper, key, log); if (err) @@ -1639,6 +1635,8 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr, if (err) goto err_free_ct; + __set_bit(IPS_CONFIRMED_BIT, &ct_info.ct->status); + nf_conntrack_get(&ct_info.ct->ct_general); return 0; err_free_ct: __ovs_ct_free_action(&ct_info); -- cgit v1.2.3 From 893626d6a353d1356528f94e081246ecf233d77a Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 28 Sep 2018 12:28:41 -0700 Subject: rtnetlink: Fail dump if target netnsid is invalid Link dumps can return results from a target namespace. If the namespace id is invalid, then the dump request should fail if get_target_net fails rather than continuing with a dump of the current namespace. Fixes: 79e1ad148c844 ("rtnetlink: use netnsid to query interface") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 63ce2283a456..7f37fe9c65a5 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1898,10 +1898,8 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) if (tb[IFLA_IF_NETNSID]) { netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]); tgt_net = get_target_net(skb->sk, netnsid); - if (IS_ERR(tgt_net)) { - tgt_net = net; - netnsid = -1; - } + if (IS_ERR(tgt_net)) + return PTR_ERR(tgt_net); } if (tb[IFLA_EXT_MASK]) -- cgit v1.2.3 From 6fe9487892b32cb1c8b8b0d552ed7222a527fe30 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Fri, 28 Sep 2018 16:26:08 -0400 Subject: bond: take rcu lock in netpoll_send_skb_on_dev The bonding driver lacks the rcu lock when it calls down into netdev_lower_get_next_private_rcu from bond_poll_controller, which results in a trace like: WARNING: CPU: 2 PID: 179 at net/core/dev.c:6567 netdev_lower_get_next_private_rcu+0x34/0x40 CPU: 2 PID: 179 Comm: kworker/u16:15 Not tainted 4.19.0-rc5-backup+ #1 Workqueue: bond0 bond_mii_monitor RIP: 0010:netdev_lower_get_next_private_rcu+0x34/0x40 Code: 48 89 fb e8 fe 29 63 ff 85 c0 74 1e 48 8b 45 00 48 81 c3 c0 00 00 00 48 8b 00 48 39 d8 74 0f 48 89 45 00 48 8b 40 f8 5b 5d c3 <0f> 0b eb de 31 c0 eb f5 0f 1f 40 00 0f 1f 44 00 00 48 8> RSP: 0018:ffffc9000087fa68 EFLAGS: 00010046 RAX: 0000000000000000 RBX: ffff880429614560 RCX: 0000000000000000 RDX: 0000000000000001 RSI: 00000000ffffffff RDI: ffffffffa184ada0 RBP: ffffc9000087fa80 R08: 0000000000000001 R09: 0000000000000000 R10: ffffc9000087f9f0 R11: ffff880429798040 R12: ffff8804289d5980 R13: ffffffffa1511f60 R14: 00000000000000c8 R15: 00000000ffffffff FS: 0000000000000000(0000) GS:ffff88042f880000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f4b78fce180 CR3: 000000018180f006 CR4: 00000000001606e0 Call Trace: bond_poll_controller+0x52/0x170 netpoll_poll_dev+0x79/0x290 netpoll_send_skb_on_dev+0x158/0x2c0 netpoll_send_udp+0x2d5/0x430 write_ext_msg+0x1e0/0x210 console_unlock+0x3c4/0x630 vprintk_emit+0xfa/0x2f0 printk+0x52/0x6e ? __netdev_printk+0x12b/0x220 netdev_info+0x64/0x80 ? bond_3ad_set_carrier+0xe9/0x180 bond_select_active_slave+0x1fc/0x310 bond_mii_monitor+0x709/0x9b0 process_one_work+0x221/0x5e0 worker_thread+0x4f/0x3b0 kthread+0x100/0x140 ? process_one_work+0x5e0/0x5e0 ? kthread_delayed_work_timer_fn+0x90/0x90 ret_from_fork+0x24/0x30 We're also doing rcu dereferences a layer up in netpoll_send_skb_on_dev before we call down into netpoll_poll_dev, so just take the lock there. Suggested-by: Cong Wang Signed-off-by: Dave Jones Signed-off-by: David S. Miller --- net/core/netpoll.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 3ae899805f8b..de1d1ba92f2d 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -312,6 +312,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, /* It is up to the caller to keep npinfo alive. */ struct netpoll_info *npinfo; + rcu_read_lock_bh(); lockdep_assert_irqs_disabled(); npinfo = rcu_dereference_bh(np->dev->npinfo); @@ -356,6 +357,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, skb_queue_tail(&npinfo->txq, skb); schedule_delayed_work(&npinfo->tx_work,0); } + rcu_read_unlock_bh(); } EXPORT_SYMBOL(netpoll_send_skb_on_dev); -- cgit v1.2.3 From 06e55addd3f40b5294e448c2cb7605ca4f28c2e3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 28 Sep 2018 14:51:47 -0700 Subject: tun: remove unused parameters tun_napi_disable() and tun_napi_del() do not need a pointer to the tun_struct Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/tun.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index e2648b5a3861..71d10fb59849 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -324,13 +324,13 @@ static void tun_napi_init(struct tun_struct *tun, struct tun_file *tfile, } } -static void tun_napi_disable(struct tun_struct *tun, struct tun_file *tfile) +static void tun_napi_disable(struct tun_file *tfile) { if (tfile->napi_enabled) napi_disable(&tfile->napi); } -static void tun_napi_del(struct tun_struct *tun, struct tun_file *tfile) +static void tun_napi_del(struct tun_file *tfile) { if (tfile->napi_enabled) netif_napi_del(&tfile->napi); @@ -690,8 +690,8 @@ static void __tun_detach(struct tun_file *tfile, bool clean) tun = rtnl_dereference(tfile->tun); if (tun && clean) { - tun_napi_disable(tun, tfile); - tun_napi_del(tun, tfile); + tun_napi_disable(tfile); + tun_napi_del(tfile); } if (tun && !tfile->detached) { @@ -758,7 +758,7 @@ static void tun_detach_all(struct net_device *dev) for (i = 0; i < n; i++) { tfile = rtnl_dereference(tun->tfiles[i]); BUG_ON(!tfile); - tun_napi_disable(tun, tfile); + tun_napi_disable(tfile); tfile->socket.sk->sk_shutdown = RCV_SHUTDOWN; tfile->socket.sk->sk_data_ready(tfile->socket.sk); RCU_INIT_POINTER(tfile->tun, NULL); @@ -774,7 +774,7 @@ static void tun_detach_all(struct net_device *dev) synchronize_net(); for (i = 0; i < n; i++) { tfile = rtnl_dereference(tun->tfiles[i]); - tun_napi_del(tun, tfile); + tun_napi_del(tfile); /* Drop read queue */ tun_queue_purge(tfile); xdp_rxq_info_unreg(&tfile->xdp_rxq); -- cgit v1.2.3 From c7256f579f8302ce2c038181c30060d0b40017b2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 28 Sep 2018 14:51:48 -0700 Subject: tun: initialize napi_mutex unconditionally This is the first part to fix following syzbot report : console output: https://syzkaller.appspot.com/x/log.txt?x=145378e6400000 kernel config: https://syzkaller.appspot.com/x/.config?x=443816db871edd66 dashboard link: https://syzkaller.appspot.com/bug?extid=e662df0ac1d753b57e80 Following patch is fixing the race condition, but it seems safer to initialize this mutex at tfile creation anyway. Fixes: 90e33d459407 ("tun: enable napi_gro_frags() for TUN/TAP driver") Signed-off-by: Eric Dumazet Reported-by: syzbot+e662df0ac1d753b57e80@syzkaller.appspotmail.com Signed-off-by: David S. Miller --- drivers/net/tun.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 71d10fb59849..729686babbf3 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -320,7 +320,6 @@ static void tun_napi_init(struct tun_struct *tun, struct tun_file *tfile, netif_napi_add(tun->dev, &tfile->napi, tun_napi_poll, NAPI_POLL_WEIGHT); napi_enable(&tfile->napi); - mutex_init(&tfile->napi_mutex); } } @@ -3199,6 +3198,7 @@ static int tun_chr_open(struct inode *inode, struct file * file) return -ENOMEM; } + mutex_init(&tfile->napi_mutex); RCU_INIT_POINTER(tfile->tun, NULL); tfile->flags = 0; tfile->ifindex = 0; -- cgit v1.2.3 From af3fb24eecb2c59246e03c99386037fd5ad84ffd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 28 Sep 2018 14:51:49 -0700 Subject: tun: napi flags belong to tfile Since tun->flags might be shared by multiple tfile structures, it is better to make sure tun_get_user() is using the flags for the current tfile. Presence of the READ_ONCE() in tun_napi_frags_enabled() gave a hint of what could happen, but we need something stronger to please syzbot. kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] PREEMPT SMP KASAN CPU: 0 PID: 13647 Comm: syz-executor5 Not tainted 4.19.0-rc5+ #59 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:dev_gro_receive+0x132/0x2720 net/core/dev.c:5427 Code: 48 c1 ea 03 80 3c 02 00 0f 85 6e 20 00 00 48 b8 00 00 00 00 00 fc ff df 4d 8b 6e 10 49 8d bd d0 00 00 00 48 89 fa 48 c1 ea 03 <80> 3c 02 00 0f 85 59 20 00 00 4d 8b a5 d0 00 00 00 31 ff 41 81 e4 RSP: 0018:ffff8801c400f410 EFLAGS: 00010202 RAX: dffffc0000000000 RBX: 0000000000000000 RCX: ffffffff8618d325 RDX: 000000000000001a RSI: ffffffff86189f97 RDI: 00000000000000d0 RBP: ffff8801c400f608 R08: ffff8801c8fb4300 R09: 0000000000000000 R10: ffffed0038801ed7 R11: 0000000000000003 R12: ffff8801d327d358 R13: 0000000000000000 R14: ffff8801c16dd8c0 R15: 0000000000000004 FS: 00007fe003615700(0000) GS:ffff8801dac00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fe1f3c43db8 CR3: 00000001bebb2000 CR4: 00000000001406f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: napi_gro_frags+0x3f4/0xc90 net/core/dev.c:5715 tun_get_user+0x31d5/0x42a0 drivers/net/tun.c:1922 tun_chr_write_iter+0xb9/0x154 drivers/net/tun.c:1967 call_write_iter include/linux/fs.h:1808 [inline] new_sync_write fs/read_write.c:474 [inline] __vfs_write+0x6b8/0x9f0 fs/read_write.c:487 vfs_write+0x1fc/0x560 fs/read_write.c:549 ksys_write+0x101/0x260 fs/read_write.c:598 __do_sys_write fs/read_write.c:610 [inline] __se_sys_write fs/read_write.c:607 [inline] __x64_sys_write+0x73/0xb0 fs/read_write.c:607 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x457579 Code: 1d b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 eb b3 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007fe003614c78 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 0000000000457579 RDX: 0000000000000012 RSI: 0000000020000000 RDI: 000000000000000a RBP: 000000000072c040 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007fe0036156d4 R13: 00000000004c5574 R14: 00000000004d8e98 R15: 00000000ffffffff Modules linked in: RIP: 0010:dev_gro_receive+0x132/0x2720 net/core/dev.c:5427 Code: 48 c1 ea 03 80 3c 02 00 0f 85 6e 20 00 00 48 b8 00 00 00 00 00 fc ff df 4d 8b 6e 10 49 8d bd d0 00 00 00 48 89 fa 48 c1 ea 03 <80> 3c 02 00 0f 85 59 20 00 00 4d 8b a5 d0 00 00 00 31 ff 41 81 e4 RSP: 0018:ffff8801c400f410 EFLAGS: 00010202 RAX: dffffc0000000000 RBX: 0000000000000000 RCX: ffffffff8618d325 RDX: 000000000000001a RSI: ffffffff86189f97 RDI: 00000000000000d0 RBP: ffff8801c400f608 R08: ffff8801c8fb4300 R09: 0000000000000000 R10: ffffed0038801ed7 R11: 0000000000000003 R12: ffff8801d327d358 R13: 0000000000000000 R14: ffff8801c16dd8c0 R15: 0000000000000004 FS: 00007fe003615700(0000) GS:ffff8801dac00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fe1f3c43db8 CR3: 00000001bebb2000 CR4: 00000000001406f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Fixes: 90e33d459407 ("tun: enable napi_gro_frags() for TUN/TAP driver") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller --- drivers/net/tun.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 729686babbf3..50e9cc19023a 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -181,6 +181,7 @@ struct tun_file { }; struct napi_struct napi; bool napi_enabled; + bool napi_frags_enabled; struct mutex napi_mutex; /* Protects access to the above napi */ struct list_head next; struct tun_struct *detached; @@ -313,9 +314,10 @@ static int tun_napi_poll(struct napi_struct *napi, int budget) } static void tun_napi_init(struct tun_struct *tun, struct tun_file *tfile, - bool napi_en) + bool napi_en, bool napi_frags) { tfile->napi_enabled = napi_en; + tfile->napi_frags_enabled = napi_en && napi_frags; if (napi_en) { netif_napi_add(tun->dev, &tfile->napi, tun_napi_poll, NAPI_POLL_WEIGHT); @@ -335,9 +337,9 @@ static void tun_napi_del(struct tun_file *tfile) netif_napi_del(&tfile->napi); } -static bool tun_napi_frags_enabled(const struct tun_struct *tun) +static bool tun_napi_frags_enabled(const struct tun_file *tfile) { - return READ_ONCE(tun->flags) & IFF_NAPI_FRAGS; + return tfile->napi_frags_enabled; } #ifdef CONFIG_TUN_VNET_CROSS_LE @@ -792,7 +794,7 @@ static void tun_detach_all(struct net_device *dev) } static int tun_attach(struct tun_struct *tun, struct file *file, - bool skip_filter, bool napi) + bool skip_filter, bool napi, bool napi_frags) { struct tun_file *tfile = file->private_data; struct net_device *dev = tun->dev; @@ -865,7 +867,7 @@ static int tun_attach(struct tun_struct *tun, struct file *file, tun_enable_queue(tfile); } else { sock_hold(&tfile->sk); - tun_napi_init(tun, tfile, napi); + tun_napi_init(tun, tfile, napi, napi_frags); } tun_set_real_num_queues(tun); @@ -1708,7 +1710,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, int err; u32 rxhash = 0; int skb_xdp = 1; - bool frags = tun_napi_frags_enabled(tun); + bool frags = tun_napi_frags_enabled(tfile); if (!(tun->dev->flags & IFF_UP)) return -EIO; @@ -2533,7 +2535,8 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) return err; err = tun_attach(tun, file, ifr->ifr_flags & IFF_NOFILTER, - ifr->ifr_flags & IFF_NAPI); + ifr->ifr_flags & IFF_NAPI, + ifr->ifr_flags & IFF_NAPI_FRAGS); if (err < 0) return err; @@ -2631,7 +2634,8 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) (ifr->ifr_flags & TUN_FEATURES); INIT_LIST_HEAD(&tun->disabled); - err = tun_attach(tun, file, false, ifr->ifr_flags & IFF_NAPI); + err = tun_attach(tun, file, false, ifr->ifr_flags & IFF_NAPI, + ifr->ifr_flags & IFF_NAPI_FRAGS); if (err < 0) goto err_free_flow; @@ -2780,7 +2784,8 @@ static int tun_set_queue(struct file *file, struct ifreq *ifr) ret = security_tun_dev_attach_queue(tun->security); if (ret < 0) goto unlock; - ret = tun_attach(tun, file, false, tun->flags & IFF_NAPI); + ret = tun_attach(tun, file, false, tun->flags & IFF_NAPI, + tun->flags & IFF_NAPI_FRAGS); } else if (ifr->ifr_flags & IFF_DETACH_QUEUE) { tun = rtnl_dereference(tfile->tun); if (!tun || !(tun->flags & IFF_MULTI_QUEUE) || tfile->detached) -- cgit v1.2.3 From ad5f97faff4231e72b96bd96adbe1b6e977a9b86 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 28 Sep 2018 23:51:54 +0200 Subject: r8169: fix network stalls due to missing bit TXCFG_AUTO_FIFO MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some of the chip-specific hw_start functions set bit TXCFG_AUTO_FIFO in register TxConfig. The original patch changed the order of some calls resulting in these changes being overwritten by rtl_set_tx_config_registers() in rtl_hw_start(). This eventually resulted in network stalls especially under high load. Analyzing the chip-specific hw_start functions all chip version from 34, with the exception of version 39, need this bit set. This patch moves setting this bit to rtl_set_tx_config_registers(). Fixes: 4fd48c4ac0a0 ("r8169: move common initializations to tp->hw_start") Reported-by: Ortwin Glück Reported-by: David Arendt Root-caused-by: Maciej S. Szmigiero Tested-by: Tony Atkinson Tested-by: David Arendt Tested-by: Ortwin Glück Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index d6b53f53909a..a94b874982dc 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -4536,9 +4536,14 @@ static void rtl8169_hw_reset(struct rtl8169_private *tp) static void rtl_set_tx_config_registers(struct rtl8169_private *tp) { - /* Set DMA burst size and Interframe Gap Time */ - RTL_W32(tp, TxConfig, (TX_DMA_BURST << TxDMAShift) | - (InterFrameGap << TxInterFrameGapShift)); + u32 val = TX_DMA_BURST << TxDMAShift | + InterFrameGap << TxInterFrameGapShift; + + if (tp->mac_version >= RTL_GIGA_MAC_VER_34 && + tp->mac_version != RTL_GIGA_MAC_VER_39) + val |= TXCFG_AUTO_FIFO; + + RTL_W32(tp, TxConfig, val); } static void rtl_set_rx_max_size(struct rtl8169_private *tp) @@ -5033,7 +5038,6 @@ static void rtl_hw_start_8168e_2(struct rtl8169_private *tp) rtl_disable_clock_request(tp); - RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO); RTL_W8(tp, MCU, RTL_R8(tp, MCU) & ~NOW_IS_OOB); /* Adjust EEE LED frequency */ @@ -5067,7 +5071,6 @@ static void rtl_hw_start_8168f(struct rtl8169_private *tp) rtl_disable_clock_request(tp); - RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO); RTL_W8(tp, MCU, RTL_R8(tp, MCU) & ~NOW_IS_OOB); RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) | PFM_EN); RTL_W32(tp, MISC, RTL_R32(tp, MISC) | PWM_EN); @@ -5112,8 +5115,6 @@ static void rtl_hw_start_8411(struct rtl8169_private *tp) static void rtl_hw_start_8168g(struct rtl8169_private *tp) { - RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO); - rtl_eri_write(tp, 0xc8, ERIAR_MASK_0101, 0x080002, ERIAR_EXGMAC); rtl_eri_write(tp, 0xcc, ERIAR_MASK_0001, 0x38, ERIAR_EXGMAC); rtl_eri_write(tp, 0xd0, ERIAR_MASK_0001, 0x48, ERIAR_EXGMAC); @@ -5211,8 +5212,6 @@ static void rtl_hw_start_8168h_1(struct rtl8169_private *tp) rtl_hw_aspm_clkreq_enable(tp, false); rtl_ephy_init(tp, e_info_8168h_1, ARRAY_SIZE(e_info_8168h_1)); - RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO); - rtl_eri_write(tp, 0xc8, ERIAR_MASK_0101, 0x00080002, ERIAR_EXGMAC); rtl_eri_write(tp, 0xcc, ERIAR_MASK_0001, 0x38, ERIAR_EXGMAC); rtl_eri_write(tp, 0xd0, ERIAR_MASK_0001, 0x48, ERIAR_EXGMAC); @@ -5295,8 +5294,6 @@ static void rtl_hw_start_8168ep(struct rtl8169_private *tp) { rtl8168ep_stop_cmac(tp); - RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO); - rtl_eri_write(tp, 0xc8, ERIAR_MASK_0101, 0x00080002, ERIAR_EXGMAC); rtl_eri_write(tp, 0xcc, ERIAR_MASK_0001, 0x2f, ERIAR_EXGMAC); rtl_eri_write(tp, 0xd0, ERIAR_MASK_0001, 0x5f, ERIAR_EXGMAC); @@ -5618,7 +5615,6 @@ static void rtl_hw_start_8402(struct rtl8169_private *tp) /* Force LAN exit from ASPM if Rx/Tx are not idle */ RTL_W32(tp, FuncEvent, RTL_R32(tp, FuncEvent) | 0x002800); - RTL_W32(tp, TxConfig, RTL_R32(tp, TxConfig) | TXCFG_AUTO_FIFO); RTL_W8(tp, MCU, RTL_R8(tp, MCU) & ~NOW_IS_OOB); rtl_ephy_init(tp, e_info_8402, ARRAY_SIZE(e_info_8402)); -- cgit v1.2.3 From 657ade07df72847f591ccdb36bd9b91ed0edbac3 Mon Sep 17 00:00:00 2001 From: Rickard x Andersson Date: Tue, 2 Oct 2018 14:49:32 +0200 Subject: net: fec: fix rare tx timeout During certain heavy network loads TX could time out with TX ring dump. TX is sometimes never restarted after reaching "tx_stop_threshold" because function "fec_enet_tx_queue" only tests the first queue. In addition the TX timeout callback function failed to recover because it also operated only on the first queue. Signed-off-by: Rickard x Andersson Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 2708297e7795..bf9b9fd6d2a0 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1158,7 +1158,7 @@ static void fec_enet_timeout_work(struct work_struct *work) napi_disable(&fep->napi); netif_tx_lock_bh(ndev); fec_restart(ndev); - netif_wake_queue(ndev); + netif_tx_wake_all_queues(ndev); netif_tx_unlock_bh(ndev); napi_enable(&fep->napi); } @@ -1273,7 +1273,7 @@ skb_done: /* Since we have freed up a buffer, the ring is no longer full */ - if (netif_queue_stopped(ndev)) { + if (netif_tx_queue_stopped(nq)) { entries_free = fec_enet_get_free_txdesc_num(txq); if (entries_free >= txq->tx_wake_threshold) netif_tx_wake_queue(nq); @@ -1746,7 +1746,7 @@ static void fec_enet_adjust_link(struct net_device *ndev) napi_disable(&fep->napi); netif_tx_lock_bh(ndev); fec_restart(ndev); - netif_wake_queue(ndev); + netif_tx_wake_all_queues(ndev); netif_tx_unlock_bh(ndev); napi_enable(&fep->napi); } @@ -2247,7 +2247,7 @@ static int fec_enet_set_pauseparam(struct net_device *ndev, napi_disable(&fep->napi); netif_tx_lock_bh(ndev); fec_restart(ndev); - netif_wake_queue(ndev); + netif_tx_wake_all_queues(ndev); netif_tx_unlock_bh(ndev); napi_enable(&fep->napi); } -- cgit v1.2.3 From fe3a83af6a50199bf250fa331e94216912f79395 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Tue, 2 Oct 2018 14:23:45 +0100 Subject: declance: Fix continuation with the adapter identification message Fix a commit 4bcc595ccd80 ("printk: reinstate KERN_CONT for printing continuation lines") regression with the `declance' driver, which caused the adapter identification message to be split between two lines, e.g.: declance.c: v0.011 by Linux MIPS DECstation task force tc6: PMAD-AA , addr = 08:00:2b:1b:2a:6a, irq = 14 tc6: registered as eth0. Address that properly, by printing identification with a single call, making the messages now look like: declance.c: v0.011 by Linux MIPS DECstation task force tc6: PMAD-AA, addr = 08:00:2b:1b:2a:6a, irq = 14 tc6: registered as eth0. Signed-off-by: Maciej W. Rozycki Fixes: 4bcc595ccd80 ("printk: reinstate KERN_CONT for printing continuation lines") Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/declance.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/amd/declance.c b/drivers/net/ethernet/amd/declance.c index 116997a8b593..00332a1ea84b 100644 --- a/drivers/net/ethernet/amd/declance.c +++ b/drivers/net/ethernet/amd/declance.c @@ -1031,6 +1031,7 @@ static int dec_lance_probe(struct device *bdev, const int type) int i, ret; unsigned long esar_base; unsigned char *esar; + const char *desc; if (dec_lance_debug && version_printed++ == 0) printk(version); @@ -1216,19 +1217,20 @@ static int dec_lance_probe(struct device *bdev, const int type) */ switch (type) { case ASIC_LANCE: - printk("%s: IOASIC onboard LANCE", name); + desc = "IOASIC onboard LANCE"; break; case PMAD_LANCE: - printk("%s: PMAD-AA", name); + desc = "PMAD-AA"; break; case PMAX_LANCE: - printk("%s: PMAX onboard LANCE", name); + desc = "PMAX onboard LANCE"; break; } for (i = 0; i < 6; i++) dev->dev_addr[i] = esar[i * 4]; - printk(", addr = %pM, irq = %d\n", dev->dev_addr, dev->irq); + printk("%s: %s, addr = %pM, irq = %d\n", + name, desc, dev->dev_addr, dev->irq); dev->netdev_ops = &lance_netdev_ops; dev->watchdog_timeo = 5*HZ; -- cgit v1.2.3 From ff58e2df62ce29d0552278c290ae494b30fe0c6f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 2 Oct 2018 10:10:14 -0700 Subject: nfp: avoid soft lockups under control message storm When FW floods the driver with control messages try to exit the cmsg processing loop every now and then to avoid soft lockups. Cmsg processing is generally very lightweight so 512 seems like a reasonable budget, which should not be exceeded under normal conditions. Fixes: 77ece8d5f196 ("nfp: add control vNIC datapath") Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Tested-by: Pieter Jansen van Vuuren Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 8ed38fd5a852..c6d29fdbb880 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -2077,14 +2077,17 @@ nfp_ctrl_rx_one(struct nfp_net *nn, struct nfp_net_dp *dp, return true; } -static void nfp_ctrl_rx(struct nfp_net_r_vector *r_vec) +static bool nfp_ctrl_rx(struct nfp_net_r_vector *r_vec) { struct nfp_net_rx_ring *rx_ring = r_vec->rx_ring; struct nfp_net *nn = r_vec->nfp_net; struct nfp_net_dp *dp = &nn->dp; + unsigned int budget = 512; - while (nfp_ctrl_rx_one(nn, dp, r_vec, rx_ring)) + while (nfp_ctrl_rx_one(nn, dp, r_vec, rx_ring) && budget--) continue; + + return budget; } static void nfp_ctrl_poll(unsigned long arg) @@ -2096,9 +2099,13 @@ static void nfp_ctrl_poll(unsigned long arg) __nfp_ctrl_tx_queued(r_vec); spin_unlock_bh(&r_vec->lock); - nfp_ctrl_rx(r_vec); - - nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry); + if (nfp_ctrl_rx(r_vec)) { + nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry); + } else { + tasklet_schedule(&r_vec->tasklet); + nn_dp_warn(&r_vec->nfp_net->dp, + "control message budget exceeded!\n"); + } } /* Setup and Configuration -- cgit v1.2.3 From 2ab2ddd301a22ca3c5f0b743593e4ad2953dfa53 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 2 Oct 2018 12:35:05 -0700 Subject: inet: make sure to grab rcu_read_lock before using ireq->ireq_opt Timer handlers do not imply rcu_read_lock(), so my recent fix triggered a LOCKDEP warning when SYNACK is retransmit. Lets add rcu_read_lock()/rcu_read_unlock() pairs around ireq->ireq_opt usages instead of guessing what is done by callers, since it is not worth the pain. Get rid of ireq_opt_deref() helper since it hides the logic without real benefit, since it is now a standard rcu_dereference(). Fixes: 1ad98e9d1bdf ("tcp/dccp: fix lockdep issue when SYN is backlogged") Signed-off-by: Eric Dumazet Reported-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/net/inet_sock.h | 5 ----- net/dccp/ipv4.c | 4 +++- net/ipv4/inet_connection_sock.c | 5 ++++- net/ipv4/tcp_ipv4.c | 4 +++- 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index a8cd5cf9ff5b..a80fd0ac4563 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -130,11 +130,6 @@ static inline int inet_request_bound_dev_if(const struct sock *sk, return sk->sk_bound_dev_if; } -static inline struct ip_options_rcu *ireq_opt_deref(const struct inet_request_sock *ireq) -{ - return rcu_dereference(ireq->ireq_opt); -} - struct inet_cork { unsigned int flags; __be32 addr; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index b08feb219b44..8e08cea6f178 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -493,9 +493,11 @@ static int dccp_v4_send_response(const struct sock *sk, struct request_sock *req dh->dccph_checksum = dccp_v4_csum_finish(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr); + rcu_read_lock(); err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, ireq->ir_rmt_addr, - ireq_opt_deref(ireq)); + rcu_dereference(ireq->ireq_opt)); + rcu_read_unlock(); err = net_xmit_eval(err); } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index dfd5009f96ef..15e7f7915a21 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -544,7 +544,8 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk, struct ip_options_rcu *opt; struct rtable *rt; - opt = ireq_opt_deref(ireq); + rcu_read_lock(); + opt = rcu_dereference(ireq->ireq_opt); flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, @@ -558,11 +559,13 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk, goto no_route; if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway) goto route_err; + rcu_read_unlock(); return &rt->dst; route_err: ip_rt_put(rt); no_route: + rcu_read_unlock(); __IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); return NULL; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 44c09eddbb78..cd426313a298 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -943,9 +943,11 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst, if (skb) { __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr); + rcu_read_lock(); err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, ireq->ir_rmt_addr, - ireq_opt_deref(ireq)); + rcu_dereference(ireq->ireq_opt)); + rcu_read_unlock(); err = net_xmit_eval(err); } -- cgit v1.2.3 From 0f3b914c9cfcd7bbedd445dc4ac5dd999fa213c2 Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Tue, 2 Oct 2018 12:14:34 -0700 Subject: bonding: fix warning message RX queue config for bonding master could be different from its slave device(s). With the commit 6a9e461f6fe4 ("bonding: pass link-local packets to bonding master also."), the packet is reinjected into stack with skb->dev as bonding master. This potentially triggers the message: "bondX received packet on queue Y, but number of RX queues is Z" whenever the queue that packet is received on is higher than the numrxqueues on bonding master (Y > Z). Fixes: 6a9e461f6fe4 ("bonding: pass link-local packets to bonding master also.") Reported-by: John Sperbeck Signed-off-by: Eric Dumazet Signed-off-by: Mahesh Bandewar Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index c05c01a00755..ee28ec9e0aba 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1187,6 +1187,7 @@ static rx_handler_result_t bond_handle_frame(struct sk_buff **pskb) if (nskb) { nskb->dev = bond->dev; + nskb->queue_mapping = 0; netif_rx(nskb); } return RX_HANDLER_PASS; -- cgit v1.2.3 From 0e1d6eca5113858ed2caea61a5adc03c595f6096 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 2 Oct 2018 15:47:35 -0700 Subject: rtnl: limit IFLA_NUM_TX_QUEUES and IFLA_NUM_RX_QUEUES to 4096 We have an impressive number of syzkaller bugs that are linked to the fact that syzbot was able to create a networking device with millions of TX (or RX) queues. Let's limit the number of RX/TX queues to 4096, this really should cover all known cases. A separate patch will add various cond_resched() in the loops handling sysfs entries at device creation and dismantle. Tested: lpaa6:~# ip link add gre-4097 numtxqueues 4097 numrxqueues 4097 type ip6gretap RTNETLINK answers: Invalid argument lpaa6:~# time ip link add gre-4096 numtxqueues 4096 numrxqueues 4096 type ip6gretap real 0m0.180s user 0m0.000s sys 0m0.107s Fixes: 76ff5cc91935 ("rtnl: allow to specify number of rx and tx queues on device creation") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 7f37fe9c65a5..448703312fed 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2835,6 +2835,12 @@ struct net_device *rtnl_create_link(struct net *net, else if (ops->get_num_rx_queues) num_rx_queues = ops->get_num_rx_queues(); + if (num_tx_queues < 1 || num_tx_queues > 4096) + return ERR_PTR(-EINVAL); + + if (num_rx_queues < 1 || num_rx_queues > 4096) + return ERR_PTR(-EINVAL); + dev = alloc_netdev_mqs(ops->priv_size, ifname, name_assign_type, ops->setup, num_tx_queues, num_rx_queues); if (!dev) -- cgit v1.2.3 From 45ec318578c0c22a11f5b9927d064418e1ab1905 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 2 Oct 2018 16:52:03 -0700 Subject: net: systemport: Fix wake-up interrupt race during resume The AON_PM_L2 is normally used to trigger and identify the source of a wake-up event. Since the RX_SYS clock is no longer turned off, we also have an interrupt being sent to the SYSTEMPORT INTRL_2_0 controller, and that interrupt remains active up until the magic packet detector is disabled which happens much later during the driver resumption. The race happens if we have a CPU that is entering the SYSTEMPORT INTRL2_0 handler during resume, and another CPU has managed to clear the wake-up interrupt during bcm_sysport_resume_from_wol(). In that case, we have the first CPU stuck in the interrupt handler with an interrupt cause that has been cleared under its feet, and so we keep returning IRQ_NONE and we never make any progress. This was not a problem before because we would always turn off the RX_SYS clock during WoL, so the SYSTEMPORT INTRL2_0 would also be turned off as well, thus not latching the interrupt. The fix is to make sure we do not enable either the MPD or BRCM_TAG_MATCH interrupts since those are redundant with what the AON_PM_L2 interrupt controller already processes and they would cause such a race to occur. Fixes: bb9051a2b230 ("net: systemport: Add support for WAKE_FILTER") Fixes: 83e82f4c706b ("net: systemport: add Wake-on-LAN support") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcmsysport.c | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 147045757b10..c57238fce863 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -1069,9 +1069,6 @@ static void bcm_sysport_resume_from_wol(struct bcm_sysport_priv *priv) { u32 reg; - /* Stop monitoring MPD interrupt */ - intrl2_0_mask_set(priv, INTRL2_0_MPD | INTRL2_0_BRCM_MATCH_TAG); - /* Disable RXCHK, active filters and Broadcom tag matching */ reg = rxchk_readl(priv, RXCHK_CONTROL); reg &= ~(RXCHK_BRCM_TAG_MATCH_MASK << @@ -1081,6 +1078,17 @@ static void bcm_sysport_resume_from_wol(struct bcm_sysport_priv *priv) /* Clear the MagicPacket detection logic */ mpd_enable_set(priv, false); + reg = intrl2_0_readl(priv, INTRL2_CPU_STATUS); + if (reg & INTRL2_0_MPD) + netdev_info(priv->netdev, "Wake-on-LAN (MPD) interrupt!\n"); + + if (reg & INTRL2_0_BRCM_MATCH_TAG) { + reg = rxchk_readl(priv, RXCHK_BRCM_TAG_MATCH_STATUS) & + RXCHK_BRCM_TAG_MATCH_MASK; + netdev_info(priv->netdev, + "Wake-on-LAN (filters 0x%02x) interrupt!\n", reg); + } + netif_dbg(priv, wol, priv->netdev, "resumed from WOL\n"); } @@ -1105,7 +1113,6 @@ static irqreturn_t bcm_sysport_rx_isr(int irq, void *dev_id) struct bcm_sysport_priv *priv = netdev_priv(dev); struct bcm_sysport_tx_ring *txr; unsigned int ring, ring_bit; - u32 reg; priv->irq0_stat = intrl2_0_readl(priv, INTRL2_CPU_STATUS) & ~intrl2_0_readl(priv, INTRL2_CPU_MASK_STATUS); @@ -1131,16 +1138,6 @@ static irqreturn_t bcm_sysport_rx_isr(int irq, void *dev_id) if (priv->irq0_stat & INTRL2_0_TX_RING_FULL) bcm_sysport_tx_reclaim_all(priv); - if (priv->irq0_stat & INTRL2_0_MPD) - netdev_info(priv->netdev, "Wake-on-LAN (MPD) interrupt!\n"); - - if (priv->irq0_stat & INTRL2_0_BRCM_MATCH_TAG) { - reg = rxchk_readl(priv, RXCHK_BRCM_TAG_MATCH_STATUS) & - RXCHK_BRCM_TAG_MATCH_MASK; - netdev_info(priv->netdev, - "Wake-on-LAN (filters 0x%02x) interrupt!\n", reg); - } - if (!priv->is_lite) goto out; @@ -2641,9 +2638,6 @@ static int bcm_sysport_suspend_to_wol(struct bcm_sysport_priv *priv) /* UniMAC receive needs to be turned on */ umac_enable_set(priv, CMD_RX_EN, 1); - /* Enable the interrupt wake-up source */ - intrl2_0_mask_clear(priv, INTRL2_0_MPD | INTRL2_0_BRCM_MATCH_TAG); - netif_dbg(priv, wol, ndev, "entered WOL mode\n"); return 0; -- cgit v1.2.3 From a07f388e2cde2be74b263f85df6f672fea0305a1 Mon Sep 17 00:00:00 2001 From: Sean Tranchetti Date: Tue, 2 Oct 2018 18:52:01 -0600 Subject: net: qualcomm: rmnet: Skip processing loopback packets RMNET RX handler was processing invalid packets that were originally sent on the real device and were looped back via dev_loopback_xmit(). This was detected using syzkaller. Fixes: ceed73a2cf4a ("drivers: net: ethernet: qualcomm: rmnet: Initial implementation") Signed-off-by: Sean Tranchetti Signed-off-by: Subash Abhinov Kasiviswanathan Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c index 7fd86d40a337..6908b26feb9e 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c @@ -189,6 +189,9 @@ rx_handler_result_t rmnet_rx_handler(struct sk_buff **pskb) if (!skb) goto done; + if (skb->pkt_type == PACKET_LOOPBACK) + return RX_HANDLER_PASS; + dev = skb->dev; port = rmnet_get_port(dev); -- cgit v1.2.3 From 6392ff3c8e4c23d0a09b0ae9f94feb3effed490b Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan Date: Tue, 2 Oct 2018 18:52:02 -0600 Subject: net: qualcomm: rmnet: Fix incorrect allocation flag in transmit The incoming skb needs to be reallocated in case the headroom is not sufficient to add the MAP header. This allocation needs to be atomic otherwise it results in the following splat [32805.801456] BUG: sleeping function called from invalid context [32805.841141] Internal error: Oops - BUG: 0 [#1] PREEMPT SMP [32805.904773] task: ffffffd7c5f62280 task.stack: ffffff80464a8000 [32805.910851] pc : ___might_sleep+0x180/0x188 [32805.915143] lr : ___might_sleep+0x180/0x188 [32806.131520] Call trace: [32806.134041] ___might_sleep+0x180/0x188 [32806.137980] __might_sleep+0x50/0x84 [32806.141653] __kmalloc_track_caller+0x80/0x3bc [32806.146215] __kmalloc_reserve+0x3c/0x88 [32806.150241] pskb_expand_head+0x74/0x288 [32806.154269] rmnet_egress_handler+0xb0/0x1d8 [32806.162239] rmnet_vnd_start_xmit+0xc8/0x13c [32806.166627] dev_hard_start_xmit+0x148/0x280 [32806.181181] sch_direct_xmit+0xa4/0x198 [32806.185125] __qdisc_run+0x1f8/0x310 [32806.188803] net_tx_action+0x23c/0x26c [32806.192655] __do_softirq+0x220/0x408 [32806.196420] do_softirq+0x4c/0x70 Fixes: ceed73a2cf4a ("drivers: net: ethernet: qualcomm: rmnet: Initial implementation") Signed-off-by: Subash Abhinov Kasiviswanathan Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c index 6908b26feb9e..1f98d65473cf 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c @@ -147,7 +147,7 @@ static int rmnet_map_egress_handler(struct sk_buff *skb, } if (skb_headroom(skb) < required_headroom) { - if (pskb_expand_head(skb, required_headroom, 0, GFP_KERNEL)) + if (pskb_expand_head(skb, required_headroom, 0, GFP_ATOMIC)) return -ENOMEM; } -- cgit v1.2.3 From ec405641e2b73160e26ef17580d0cf28565d146c Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan Date: Tue, 2 Oct 2018 18:52:03 -0600 Subject: net: qualcomm: rmnet: Fix incorrect allocation flag in receive path The incoming skb needs to be reallocated in case the headroom is not sufficient to adjust the ethernet header. This allocation needs to be atomic otherwise it results in this splat [<600601bb>] ___might_sleep+0x185/0x1a3 [<603f6314>] ? _raw_spin_unlock_irqrestore+0x0/0x27 [<60069bb0>] ? __wake_up_common_lock+0x95/0xd1 [<600602b0>] __might_sleep+0xd7/0xe2 [<60065598>] ? enqueue_task_fair+0x112/0x209 [<600eea13>] __kmalloc_track_caller+0x5d/0x124 [<600ee9b6>] ? __kmalloc_track_caller+0x0/0x124 [<602696d5>] __kmalloc_reserve.isra.34+0x30/0x7e [<603f629b>] ? _raw_spin_lock_irqsave+0x0/0x3d [<6026b744>] pskb_expand_head+0xbf/0x310 [<6025ca6a>] rmnet_rx_handler+0x7e/0x16b [<6025c9ec>] ? rmnet_rx_handler+0x0/0x16b [<6027ad0c>] __netif_receive_skb_core+0x301/0x96f [<60033c17>] ? set_signals+0x0/0x40 [<6027bbcb>] __netif_receive_skb+0x24/0x8e Fixes: 74692caf1b0b ("net: qualcomm: rmnet: Process packets over ethernet") Signed-off-by: Sean Tranchetti Signed-off-by: Subash Abhinov Kasiviswanathan Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c index 1f98d65473cf..11167abe5934 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c @@ -113,7 +113,7 @@ rmnet_map_ingress_handler(struct sk_buff *skb, struct sk_buff *skbn; if (skb->dev->type == ARPHRD_ETHER) { - if (pskb_expand_head(skb, ETH_HLEN, 0, GFP_KERNEL)) { + if (pskb_expand_head(skb, ETH_HLEN, 0, GFP_ATOMIC)) { kfree_skb(skb); return; } -- cgit v1.2.3 From 64199fc0a46ba211362472f7f942f900af9492fd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 30 Sep 2018 11:33:39 -0700 Subject: ipv4: fix use-after-free in ip_cmsg_recv_dstaddr() Caching ip_hdr(skb) before a call to pskb_may_pull() is buggy, do not do it. Fixes: 2efd4fca703a ("ip: in cmsg IP(V6)_ORIGDSTADDR call pskb_may_pull") Signed-off-by: Eric Dumazet Cc: Willem de Bruijn Reported-by: syzbot Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/ipv4/ip_sockglue.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index c0fe5ad996f2..26c36cccabdc 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -149,7 +149,6 @@ static void ip_cmsg_recv_security(struct msghdr *msg, struct sk_buff *skb) static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) { struct sockaddr_in sin; - const struct iphdr *iph = ip_hdr(skb); __be16 *ports; int end; @@ -164,7 +163,7 @@ static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) ports = (__be16 *)skb_transport_header(skb); sin.sin_family = AF_INET; - sin.sin_addr.s_addr = iph->daddr; + sin.sin_addr.s_addr = ip_hdr(skb)->daddr; sin.sin_port = ports[1]; memset(sin.sin_zero, 0, sizeof(sin.sin_zero)); -- cgit v1.2.3 From 9003b369499b3320d3f16145145b729fb6ff3bd4 Mon Sep 17 00:00:00 2001 From: "Alex Xu (Hello71)" Date: Sun, 30 Sep 2018 11:06:39 -0400 Subject: r8169: always autoneg on resume This affects at least versions 25 and 33, so assume all cards are broken and just renegotiate by default. Fixes: 10bc6a6042c9 ("r8169: fix autoneg issue on resume with RTL8168E") Signed-off-by: Alex Xu (Hello71) Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index a94b874982dc..9a5e2969df61 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -4072,13 +4072,12 @@ static void rtl8169_init_phy(struct net_device *dev, struct rtl8169_private *tp) genphy_soft_reset(dev->phydev); - /* It was reported that chip version 33 ends up with 10MBit/Half on a + /* It was reported that several chips end up with 10MBit/Half on a * 1GBit link after resuming from S3. For whatever reason the PHY on - * this chip doesn't properly start a renegotiation when soft-reset. + * these chips doesn't properly start a renegotiation when soft-reset. * Explicitly requesting a renegotiation fixes this. */ - if (tp->mac_version == RTL_GIGA_MAC_VER_33 && - dev->phydev->autoneg == AUTONEG_ENABLE) + if (dev->phydev->autoneg == AUTONEG_ENABLE) phy_restart_aneg(dev->phydev); } -- cgit v1.2.3 From 2cc543f5cd6deda27ef463686fa08c16c8c0990b Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 3 Oct 2018 12:45:56 +0200 Subject: sctp: fix fall-through annotation Replace "fallthru" with a proper "fall through" annotation. This fix is part of the ongoing efforts to enabling -Wimplicit-fallthrough Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- net/sctp/outqueue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index d74d00b29942..42191ed9902b 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -1048,7 +1048,7 @@ static void sctp_outq_flush_data(struct sctp_flush_ctx *ctx, if (!ctx->packet || !ctx->packet->has_cookie_echo) return; - /* fallthru */ + /* fall through */ case SCTP_STATE_ESTABLISHED: case SCTP_STATE_SHUTDOWN_PENDING: case SCTP_STATE_SHUTDOWN_RECEIVED: -- cgit v1.2.3 From 4233cfe6ec4683497d7318f55ce7617e97f2e610 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Wed, 3 Oct 2018 11:30:35 -0700 Subject: ixgbe: check return value of napi_complete_done() The NIC driver should only enable interrupts when napi_complete_done() returns true. This patch adds the check for ixgbe. Cc: stable@vger.kernel.org # 4.10+ Suggested-by: Eric Dumazet Signed-off-by: Song Liu Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index f27d73a7bf16..6cdd58d9d461 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -3196,11 +3196,13 @@ int ixgbe_poll(struct napi_struct *napi, int budget) return budget; /* all work done, exit the polling mode */ - napi_complete_done(napi, work_done); - if (adapter->rx_itr_setting & 1) - ixgbe_set_itr(q_vector); - if (!test_bit(__IXGBE_DOWN, &adapter->state)) - ixgbe_irq_enable_queues(adapter, BIT_ULL(q_vector->v_idx)); + if (likely(napi_complete_done(napi, work_done))) { + if (adapter->rx_itr_setting & 1) + ixgbe_set_itr(q_vector); + if (!test_bit(__IXGBE_DOWN, &adapter->state)) + ixgbe_irq_enable_queues(adapter, + BIT_ULL(q_vector->v_idx)); + } return min(work_done, budget - 1); } -- cgit v1.2.3