From 7fe94612dd4cfcd35fe0ec87745fb31ad2be71f8 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 8 Oct 2020 16:13:24 +0800 Subject: xfrm: interface: fix the priorities for ipip and ipv6 tunnels As Nicolas noticed in his case, when xfrm_interface module is installed the standard IP tunnels will break in receiving packets. This is caused by the IP tunnel handlers with a higher priority in xfrm interface processing incoming packets by xfrm_input(), which would drop the packets and return 0 instead when anything wrong happens. Rather than changing xfrm_input(), this patch is to adjust the priority for the IP tunnel handlers in xfrm interface, so that the packets would go to xfrmi's later than the others', as the others' would not drop the packets when the handlers couldn't process them. Note that IPCOMP also defines its own IPIP tunnel handler and it calls xfrm_input() as well, so we must make its priority lower than xfrmi's, which means having xfrmi loaded would still break IPCOMP. We may seek another way to fix it in xfrm_input() in the future. Reported-by: Nicolas Dichtel Tested-by: Nicolas Dichtel Fixes: da9bbf0598c9 ("xfrm: interface: support IPIP and IPIP6 tunnels processing with .cb_handler") FIxes: d7b360c2869f ("xfrm: interface: support IP6IP6 and IP6IP tunnels processing with .cb_handler") Signed-off-by: Xin Long Signed-off-by: Steffen Klassert --- net/ipv4/xfrm4_tunnel.c | 4 ++-- net/ipv6/xfrm6_tunnel.c | 4 ++-- net/xfrm/xfrm_interface.c | 8 ++++---- 3 files changed, 8 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c index dc19aff7c2e0..fb0648e7fb32 100644 --- a/net/ipv4/xfrm4_tunnel.c +++ b/net/ipv4/xfrm4_tunnel.c @@ -64,14 +64,14 @@ static int xfrm_tunnel_err(struct sk_buff *skb, u32 info) static struct xfrm_tunnel xfrm_tunnel_handler __read_mostly = { .handler = xfrm_tunnel_rcv, .err_handler = xfrm_tunnel_err, - .priority = 3, + .priority = 4, }; #if IS_ENABLED(CONFIG_IPV6) static struct xfrm_tunnel xfrm64_tunnel_handler __read_mostly = { .handler = xfrm_tunnel_rcv, .err_handler = xfrm_tunnel_err, - .priority = 2, + .priority = 3, }; #endif diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index 25b7ebda2fab..f696d46e6910 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -303,13 +303,13 @@ static const struct xfrm_type xfrm6_tunnel_type = { static struct xfrm6_tunnel xfrm6_tunnel_handler __read_mostly = { .handler = xfrm6_tunnel_rcv, .err_handler = xfrm6_tunnel_err, - .priority = 2, + .priority = 3, }; static struct xfrm6_tunnel xfrm46_tunnel_handler __read_mostly = { .handler = xfrm6_tunnel_rcv, .err_handler = xfrm6_tunnel_err, - .priority = 2, + .priority = 3, }; static int __net_init xfrm6_tunnel_net_init(struct net *net) diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index a8f66112c52b..0bb7963b9f6b 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -830,14 +830,14 @@ static struct xfrm6_tunnel xfrmi_ipv6_handler __read_mostly = { .handler = xfrmi6_rcv_tunnel, .cb_handler = xfrmi_rcv_cb, .err_handler = xfrmi6_err, - .priority = -1, + .priority = 2, }; static struct xfrm6_tunnel xfrmi_ip6ip_handler __read_mostly = { .handler = xfrmi6_rcv_tunnel, .cb_handler = xfrmi_rcv_cb, .err_handler = xfrmi6_err, - .priority = -1, + .priority = 2, }; #endif @@ -875,14 +875,14 @@ static struct xfrm_tunnel xfrmi_ipip_handler __read_mostly = { .handler = xfrmi4_rcv_tunnel, .cb_handler = xfrmi_rcv_cb, .err_handler = xfrmi4_err, - .priority = -1, + .priority = 3, }; static struct xfrm_tunnel xfrmi_ipip6_handler __read_mostly = { .handler = xfrmi4_rcv_tunnel, .cb_handler = xfrmi_rcv_cb, .err_handler = xfrmi4_err, - .priority = -1, + .priority = 2, }; #endif -- cgit v1.2.3 From a779d91314ca7208b7feb3ad817b62904397c56d Mon Sep 17 00:00:00 2001 From: zhuoliang zhang Date: Fri, 23 Oct 2020 09:05:35 +0200 Subject: net: xfrm: fix a race condition during allocing spi we found that the following race condition exists in xfrm_alloc_userspi flow: user thread state_hash_work thread ---- ---- xfrm_alloc_userspi() __find_acq_core() /*alloc new xfrm_state:x*/ xfrm_state_alloc() /*schedule state_hash_work thread*/ xfrm_hash_grow_check() xfrm_hash_resize() xfrm_alloc_spi /*hold lock*/ x->id.spi = htonl(spi) spin_lock_bh(&net->xfrm.xfrm_state_lock) /*waiting lock release*/ xfrm_hash_transfer() spin_lock_bh(&net->xfrm.xfrm_state_lock) /*add x into hlist:net->xfrm.state_byspi*/ hlist_add_head_rcu(&x->byspi) spin_unlock_bh(&net->xfrm.xfrm_state_lock) /*add x into hlist:net->xfrm.state_byspi 2 times*/ hlist_add_head_rcu(&x->byspi) 1. a new state x is alloced in xfrm_state_alloc() and added into the bydst hlist in __find_acq_core() on the LHS; 2. on the RHS, state_hash_work thread travels the old bydst and tranfers every xfrm_state (include x) into the new bydst hlist and new byspi hlist; 3. user thread on the LHS gets the lock and adds x into the new byspi hlist again. So the same xfrm_state (x) is added into the same list_hash (net->xfrm.state_byspi) 2 times that makes the list_hash become an inifite loop. To fix the race, x->id.spi = htonl(spi) in the xfrm_alloc_spi() is moved to the back of spin_lock_bh, sothat state_hash_work thread no longer add x which id.spi is zero into the hash_list. Fixes: f034b5d4efdf ("[XFRM]: Dynamic xfrm_state hash table sizing.") Signed-off-by: zhuoliang zhang Acked-by: Herbert Xu Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_state.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index efc89a92961d..ee6ac32bb06d 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -2004,6 +2004,7 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high) int err = -ENOENT; __be32 minspi = htonl(low); __be32 maxspi = htonl(high); + __be32 newspi = 0; u32 mark = x->mark.v & x->mark.m; spin_lock_bh(&x->lock); @@ -2022,21 +2023,22 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high) xfrm_state_put(x0); goto unlock; } - x->id.spi = minspi; + newspi = minspi; } else { u32 spi = 0; for (h = 0; h < high-low+1; h++) { spi = low + prandom_u32()%(high-low+1); x0 = xfrm_state_lookup(net, mark, &x->id.daddr, htonl(spi), x->id.proto, x->props.family); if (x0 == NULL) { - x->id.spi = htonl(spi); + newspi = htonl(spi); break; } xfrm_state_put(x0); } } - if (x->id.spi) { + if (newspi) { spin_lock_bh(&net->xfrm.xfrm_state_lock); + x->id.spi = newspi; h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, x->props.family); hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h); spin_unlock_bh(&net->xfrm.xfrm_state_lock); -- cgit v1.2.3 From 804fc6a2931e692f50e8e317fcb0c8887331b405 Mon Sep 17 00:00:00 2001 From: Mathy Vanhoef Date: Mon, 19 Oct 2020 20:01:13 +0400 Subject: mac80211: fix regression where EAPOL frames were sent in plaintext When sending EAPOL frames via NL80211 they are treated as injected frames in mac80211. Due to commit 1df2bdba528b ("mac80211: never drop injected frames even if normally not allowed") these injected frames were not assigned a sta context in the function ieee80211_tx_dequeue, causing certain wireless network cards to always send EAPOL frames in plaintext. This may cause compatibility issues with some clients or APs, which for instance can cause the group key handshake to fail and in turn would cause the station to get disconnected. This commit fixes this regression by assigning a sta context in ieee80211_tx_dequeue to injected frames as well. Note that sending EAPOL frames in plaintext is not a security issue since they contain their own encryption and authentication protection. Cc: stable@vger.kernel.org Fixes: 1df2bdba528b ("mac80211: never drop injected frames even if normally not allowed") Reported-by: Thomas Deutschmann Tested-by: Christian Hesse Tested-by: Thomas Deutschmann Signed-off-by: Mathy Vanhoef Link: https://lore.kernel.org/r/20201019160113.350912-1-Mathy.Vanhoef@kuleuven.be Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 8ba10a48ded4..55b41167a2be 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -3619,13 +3619,14 @@ begin: tx.skb = skb; tx.sdata = vif_to_sdata(info->control.vif); - if (txq->sta && !(info->flags & IEEE80211_TX_CTL_INJECTED)) { + if (txq->sta) { tx.sta = container_of(txq->sta, struct sta_info, sta); /* * Drop unicast frames to unauthorised stations unless they are - * EAPOL frames from the local station. + * injected frames or EAPOL frames from the local station. */ - if (unlikely(ieee80211_is_data(hdr->frame_control) && + if (unlikely(!(info->flags & IEEE80211_TX_CTL_INJECTED) && + ieee80211_is_data(hdr->frame_control) && !ieee80211_vif_is_mesh(&tx.sdata->vif) && tx.sdata->vif.type != NL80211_IFTYPE_OCB && !is_multicast_ether_addr(hdr->addr1) && -- cgit v1.2.3 From 14f46c1e5108696ec1e5a129e838ecedf108c7bf Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 9 Oct 2020 13:25:41 +0200 Subject: mac80211: fix use of skb payload instead of header When ieee80211_skb_resize() is called from ieee80211_build_hdr() the skb has no 802.11 header yet, in fact it consist only of the payload as the ethernet frame is removed. As such, we're using the payload data for ieee80211_is_mgmt(), which is of course completely wrong. This didn't really hurt us because these are always data frames, so we could only have added more tailroom than we needed if we determined it was a management frame and sdata->crypto_tx_tailroom_needed_cnt was false. However, syzbot found that of course there need not be any payload, so we're using at best uninitialized memory for the check. Fix this to pass explicitly the kind of frame that we have instead of checking there, by replacing the "bool may_encrypt" argument with an argument that can carry the three possible states - it's not going to be encrypted, it's a management frame, or it's a data frame (and then we check sdata->crypto_tx_tailroom_needed_cnt). Reported-by: syzbot+32fd1a1bfe355e93f1e2@syzkaller.appspotmail.com Signed-off-by: Johannes Berg Link: https://lore.kernel.org/r/20201009132538.e1fd7f802947.I799b288466ea2815f9d4c84349fae697dca2f189@changeid Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 37 ++++++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 55b41167a2be..56a4d0d20a26 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1942,19 +1942,24 @@ static bool ieee80211_tx(struct ieee80211_sub_if_data *sdata, /* device xmit handlers */ +enum ieee80211_encrypt { + ENCRYPT_NO, + ENCRYPT_MGMT, + ENCRYPT_DATA, +}; + static int ieee80211_skb_resize(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, - int head_need, bool may_encrypt) + int head_need, + enum ieee80211_encrypt encrypt) { struct ieee80211_local *local = sdata->local; - struct ieee80211_hdr *hdr; bool enc_tailroom; int tail_need = 0; - hdr = (struct ieee80211_hdr *) skb->data; - enc_tailroom = may_encrypt && - (sdata->crypto_tx_tailroom_needed_cnt || - ieee80211_is_mgmt(hdr->frame_control)); + enc_tailroom = encrypt == ENCRYPT_MGMT || + (encrypt == ENCRYPT_DATA && + sdata->crypto_tx_tailroom_needed_cnt); if (enc_tailroom) { tail_need = IEEE80211_ENCRYPT_TAILROOM; @@ -1985,23 +1990,29 @@ void ieee80211_xmit(struct ieee80211_sub_if_data *sdata, { struct ieee80211_local *local = sdata->local; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - struct ieee80211_hdr *hdr; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; int headroom; - bool may_encrypt; + enum ieee80211_encrypt encrypt; - may_encrypt = !(info->flags & IEEE80211_TX_INTFL_DONT_ENCRYPT); + if (info->flags & IEEE80211_TX_INTFL_DONT_ENCRYPT) + encrypt = ENCRYPT_NO; + else if (ieee80211_is_mgmt(hdr->frame_control)) + encrypt = ENCRYPT_MGMT; + else + encrypt = ENCRYPT_DATA; headroom = local->tx_headroom; - if (may_encrypt) + if (encrypt != ENCRYPT_NO) headroom += sdata->encrypt_headroom; headroom -= skb_headroom(skb); headroom = max_t(int, 0, headroom); - if (ieee80211_skb_resize(sdata, skb, headroom, may_encrypt)) { + if (ieee80211_skb_resize(sdata, skb, headroom, encrypt)) { ieee80211_free_txskb(&local->hw, skb); return; } + /* reload after potential resize */ hdr = (struct ieee80211_hdr *) skb->data; info->control.vif = &sdata->vif; @@ -2828,7 +2839,7 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, head_need += sdata->encrypt_headroom; head_need += local->tx_headroom; head_need = max_t(int, 0, head_need); - if (ieee80211_skb_resize(sdata, skb, head_need, true)) { + if (ieee80211_skb_resize(sdata, skb, head_need, ENCRYPT_DATA)) { ieee80211_free_txskb(&local->hw, skb); skb = NULL; return ERR_PTR(-ENOMEM); @@ -3502,7 +3513,7 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, if (unlikely(ieee80211_skb_resize(sdata, skb, max_t(int, extra_head + hw_headroom - skb_headroom(skb), 0), - false))) { + ENCRYPT_NO))) { kfree_skb(skb); return true; } -- cgit v1.2.3 From 9bdaf3b91efd229dd272b228e13df10310c80d19 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 9 Oct 2020 13:58:22 +0200 Subject: cfg80211: initialize wdev data earlier There's a race condition in the netdev registration in that NETDEV_REGISTER actually happens after the netdev is available, and so if we initialize things only there, we might get called with an uninitialized wdev through nl80211 - not using a wdev but using a netdev interface index. I found this while looking into a syzbot report, but it doesn't really seem to be related, and unfortunately there's no repro for it (yet). I can't (yet) explain how it managed to get into cfg80211_release_pmsr() from nl80211_netlink_notify() without the wdev having been initialized, as the latter only iterates the wdevs that are linked into the rdev, which even without the change here happened after init. However, looking at this, it seems fairly clear that the init needs to be done earlier, otherwise we might even re-init on a netns move, when data might still be pending. Signed-off-by: Johannes Berg Link: https://lore.kernel.org/r/20201009135821.fdcbba3aad65.Ie9201d91dbcb7da32318812effdc1561aeaf4cdc@changeid Signed-off-by: Johannes Berg --- net/wireless/core.c | 57 +++++++++++++++++++++++++++----------------------- net/wireless/core.h | 5 +++-- net/wireless/nl80211.c | 3 ++- 3 files changed, 36 insertions(+), 29 deletions(-) (limited to 'net') diff --git a/net/wireless/core.c b/net/wireless/core.c index 9f23923e8d29..240282c083aa 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -1250,8 +1250,7 @@ void cfg80211_stop_iface(struct wiphy *wiphy, struct wireless_dev *wdev, } EXPORT_SYMBOL(cfg80211_stop_iface); -void cfg80211_init_wdev(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev) +void cfg80211_init_wdev(struct wireless_dev *wdev) { mutex_init(&wdev->mtx); INIT_LIST_HEAD(&wdev->event_list); @@ -1262,6 +1261,30 @@ void cfg80211_init_wdev(struct cfg80211_registered_device *rdev, spin_lock_init(&wdev->pmsr_lock); INIT_WORK(&wdev->pmsr_free_wk, cfg80211_pmsr_free_wk); +#ifdef CONFIG_CFG80211_WEXT + wdev->wext.default_key = -1; + wdev->wext.default_mgmt_key = -1; + wdev->wext.connect.auth_type = NL80211_AUTHTYPE_AUTOMATIC; +#endif + + if (wdev->wiphy->flags & WIPHY_FLAG_PS_ON_BY_DEFAULT) + wdev->ps = true; + else + wdev->ps = false; + /* allow mac80211 to determine the timeout */ + wdev->ps_timeout = -1; + + if ((wdev->iftype == NL80211_IFTYPE_STATION || + wdev->iftype == NL80211_IFTYPE_P2P_CLIENT || + wdev->iftype == NL80211_IFTYPE_ADHOC) && !wdev->use_4addr) + wdev->netdev->priv_flags |= IFF_DONT_BRIDGE; + + INIT_WORK(&wdev->disconnect_wk, cfg80211_autodisconnect_wk); +} + +void cfg80211_register_wdev(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev) +{ /* * We get here also when the interface changes network namespaces, * as it's registered into the new one, but we don't want it to @@ -1295,6 +1318,11 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, switch (state) { case NETDEV_POST_INIT: SET_NETDEV_DEVTYPE(dev, &wiphy_type); + wdev->netdev = dev; + /* can only change netns with wiphy */ + dev->features |= NETIF_F_NETNS_LOCAL; + + cfg80211_init_wdev(wdev); break; case NETDEV_REGISTER: /* @@ -1302,35 +1330,12 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, * called within code protected by it when interfaces * are added with nl80211. */ - /* can only change netns with wiphy */ - dev->features |= NETIF_F_NETNS_LOCAL; - if (sysfs_create_link(&dev->dev.kobj, &rdev->wiphy.dev.kobj, "phy80211")) { pr_err("failed to add phy80211 symlink to netdev!\n"); } - wdev->netdev = dev; -#ifdef CONFIG_CFG80211_WEXT - wdev->wext.default_key = -1; - wdev->wext.default_mgmt_key = -1; - wdev->wext.connect.auth_type = NL80211_AUTHTYPE_AUTOMATIC; -#endif - - if (wdev->wiphy->flags & WIPHY_FLAG_PS_ON_BY_DEFAULT) - wdev->ps = true; - else - wdev->ps = false; - /* allow mac80211 to determine the timeout */ - wdev->ps_timeout = -1; - - if ((wdev->iftype == NL80211_IFTYPE_STATION || - wdev->iftype == NL80211_IFTYPE_P2P_CLIENT || - wdev->iftype == NL80211_IFTYPE_ADHOC) && !wdev->use_4addr) - dev->priv_flags |= IFF_DONT_BRIDGE; - - INIT_WORK(&wdev->disconnect_wk, cfg80211_autodisconnect_wk); - cfg80211_init_wdev(rdev, wdev); + cfg80211_register_wdev(rdev, wdev); break; case NETDEV_GOING_DOWN: cfg80211_leave(rdev, wdev); diff --git a/net/wireless/core.h b/net/wireless/core.h index e1ec9ac8e608..e3e9686859d4 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -209,8 +209,9 @@ struct wiphy *wiphy_idx_to_wiphy(int wiphy_idx); int cfg80211_switch_netns(struct cfg80211_registered_device *rdev, struct net *net); -void cfg80211_init_wdev(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev); +void cfg80211_init_wdev(struct wireless_dev *wdev); +void cfg80211_register_wdev(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev); static inline void wdev_lock(struct wireless_dev *wdev) __acquires(wdev) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 554796a6c6fe..a77174b99b07 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3885,7 +3885,8 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) * P2P Device and NAN do not have a netdev, so don't go * through the netdev notifier and must be added here */ - cfg80211_init_wdev(rdev, wdev); + cfg80211_init_wdev(wdev); + cfg80211_register_wdev(rdev, wdev); break; default: break; -- cgit v1.2.3 From dcd479e10a0510522a5d88b29b8f79ea3467d501 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 9 Oct 2020 14:17:11 +0200 Subject: mac80211: always wind down STA state When (for example) an IBSS station is pre-moved to AUTHORIZED before it's inserted, and then the insertion fails, we don't clean up the fast RX/TX states that might already have been created, since we don't go through all the state transitions again on the way down. Do that, if it hasn't been done already, when the station is freed. I considered only freeing the fast TX/RX state there, but we might add more state so it's more robust to wind down the state properly. Note that we warn if the station was ever inserted, it should have been properly cleaned up in that case, and the driver will probably not like things happening out of order. Reported-by: syzbot+2e293dbd67de2836ba42@syzkaller.appspotmail.com Link: https://lore.kernel.org/r/20201009141710.7223b322a955.I95bd08b9ad0e039c034927cce0b75beea38e059b@changeid Signed-off-by: Johannes Berg --- net/mac80211/sta_info.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'net') diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index fb4f2b9b294f..4fe284ff1ea3 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -258,6 +258,24 @@ struct sta_info *sta_info_get_by_idx(struct ieee80211_sub_if_data *sdata, */ void sta_info_free(struct ieee80211_local *local, struct sta_info *sta) { + /* + * If we had used sta_info_pre_move_state() then we might not + * have gone through the state transitions down again, so do + * it here now (and warn if it's inserted). + * + * This will clear state such as fast TX/RX that may have been + * allocated during state transitions. + */ + while (sta->sta_state > IEEE80211_STA_NONE) { + int ret; + + WARN_ON_ONCE(test_sta_flag(sta, WLAN_STA_INSERTED)); + + ret = sta_info_move_state(sta, sta->sta_state - 1); + if (WARN_ONCE(ret, "sta_info_move_state() returned %d\n", ret)) + break; + } + if (sta->rate_ctrl) rate_control_free_sta(sta); -- cgit v1.2.3 From b1e8eb11fb9cf666d8ae36bbcf533233a504c921 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 23 Oct 2020 18:33:08 +0200 Subject: mac80211: fix kernel-doc markups Some identifiers have different names between their prototypes and the kernel-doc markup. Others need to be fixed, as kernel-doc markups should use this format: identifier - description In the specific case of __sta_info_flush(), add a documentation for sta_info_flush(), as this one is the one used outside sta_info.c. Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Johannes Berg Link: https://lore.kernel.org/r/978d35eef2dc76e21c81931804e4eaefbd6d635e.1603469755.git.mchehab+huawei@kernel.org Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 9 +++++---- include/net/mac80211.h | 7 ++++--- net/mac80211/sta_info.h | 9 ++++++++- 3 files changed, 17 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 661edfc8722e..d5ab8d99739f 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1444,7 +1444,7 @@ int cfg80211_check_station_change(struct wiphy *wiphy, enum cfg80211_station_type statype); /** - * enum station_info_rate_flags - bitrate info flags + * enum rate_info_flags - bitrate info flags * * Used by the driver to indicate the specific rate transmission * type for 802.11n transmissions. @@ -1517,7 +1517,7 @@ struct rate_info { }; /** - * enum station_info_rate_flags - bitrate info flags + * enum bss_param_flags - bitrate info flags * * Used by the driver to indicate the specific rate transmission * type for 802.11n transmissions. @@ -6467,7 +6467,8 @@ void cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid, struct ieee80211_channel *channel, gfp_t gfp); /** - * cfg80211_notify_new_candidate - notify cfg80211 of a new mesh peer candidate + * cfg80211_notify_new_peer_candidate - notify cfg80211 of a new mesh peer + * candidate * * @dev: network device * @macaddr: the MAC address of the new candidate @@ -7606,7 +7607,7 @@ u32 cfg80211_calculate_bitrate(struct rate_info *rate); void cfg80211_unregister_wdev(struct wireless_dev *wdev); /** - * struct cfg80211_ft_event - FT Information Elements + * struct cfg80211_ft_event_params - FT Information Elements * @ies: FT IEs * @ies_len: length of the FT IE in bytes * @target_ap: target AP's MAC address diff --git a/include/net/mac80211.h b/include/net/mac80211.h index e8e295dae744..dcdba96814a2 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -3311,7 +3311,7 @@ enum ieee80211_roc_type { }; /** - * enum ieee80211_reconfig_complete_type - reconfig type + * enum ieee80211_reconfig_type - reconfig type * * This enum is used by the reconfig_complete() callback to indicate what * reconfiguration type was completed. @@ -6334,7 +6334,8 @@ bool ieee80211_tx_prepare_skb(struct ieee80211_hw *hw, int band, struct ieee80211_sta **sta); /** - * Sanity-check and parse the radiotap header of injected frames + * ieee80211_parse_tx_radiotap - Sanity-check and parse the radiotap header + * of injected frames * @skb: packet injected by userspace * @dev: the &struct device of this 802.11 device */ @@ -6389,7 +6390,7 @@ int ieee80211_parse_p2p_noa(const struct ieee80211_p2p_noa_attr *attr, void ieee80211_update_p2p_noa(struct ieee80211_noa_data *data, u32 tsf); /** - * ieee80211_tdls_oper - request userspace to perform a TDLS operation + * ieee80211_tdls_oper_request - request userspace to perform a TDLS operation * @vif: virtual interface * @peer: the peer's destination address * @oper: the requested TDLS operation diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 00ae81e9e1a1..7afd07636b81 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -785,7 +785,7 @@ int sta_info_init(struct ieee80211_local *local); void sta_info_stop(struct ieee80211_local *local); /** - * sta_info_flush - flush matching STA entries from the STA table + * __sta_info_flush - flush matching STA entries from the STA table * * Returns the number of removed STA entries. * @@ -794,6 +794,13 @@ void sta_info_stop(struct ieee80211_local *local); */ int __sta_info_flush(struct ieee80211_sub_if_data *sdata, bool vlans); +/** + * sta_info_flush - flush matching STA entries from the STA table + * + * Returns the number of removed STA entries. + * + * @sdata: sdata to remove all stations from + */ static inline int sta_info_flush(struct ieee80211_sub_if_data *sdata) { return __sta_info_flush(sdata, false); -- cgit v1.2.3 From db18d20d1cb0fde16d518fb5ccd38679f174bc04 Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Fri, 9 Oct 2020 15:02:15 +0800 Subject: cfg80211: regulatory: Fix inconsistent format argument Fix follow warning: [net/wireless/reg.c:3619]: (warning) %d in format string (no. 2) requires 'int' but the argument type is 'unsigned int'. Reported-by: Hulk Robot Signed-off-by: Ye Bin Link: https://lore.kernel.org/r/20201009070215.63695-1-yebin10@huawei.com Signed-off-by: Johannes Berg --- net/wireless/reg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 3dab859641e1..a04fdfb35f07 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -3616,7 +3616,7 @@ static void print_rd_rules(const struct ieee80211_regdomain *rd) power_rule = ®_rule->power_rule; if (reg_rule->flags & NL80211_RRF_AUTO_BW) - snprintf(bw, sizeof(bw), "%d KHz, %d KHz AUTO", + snprintf(bw, sizeof(bw), "%d KHz, %u KHz AUTO", freq_range->max_bandwidth_khz, reg_get_max_bandwidth(rd, reg_rule)); else -- cgit v1.2.3 From c2f46814521113f6699a74e0a0424cbc5b305479 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 13 Oct 2020 14:01:57 +0200 Subject: mac80211: don't require VHT elements for HE on 2.4 GHz After the previous similar bugfix there was another bug here, if no VHT elements were found we also disabled HE. Fix this to disable HE only on the 5 GHz band; on 6 GHz it was already not disabled, and on 2.4 GHz there need (should) not be any VHT. Fixes: 57fa5e85d53c ("mac80211: determine chandef from HE 6 GHz operation") Link: https://lore.kernel.org/r/20201013140156.535a2fc6192f.Id6e5e525a60ac18d245d86f4015f1b271fce6ee6@changeid Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index f400240a556f..6adfcb9c06dc 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -5464,6 +5464,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, struct cfg80211_assoc_request *req) { bool is_6ghz = req->bss->channel->band == NL80211_BAND_6GHZ; + bool is_5ghz = req->bss->channel->band == NL80211_BAND_5GHZ; struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_bss *bss = (void *)req->bss->priv; @@ -5616,7 +5617,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, if (vht_ie && vht_ie[1] >= sizeof(struct ieee80211_vht_cap)) memcpy(&assoc_data->ap_vht_cap, vht_ie + 2, sizeof(struct ieee80211_vht_cap)); - else if (!is_6ghz) + else if (is_5ghz) ifmgd->flags |= IEEE80211_STA_DISABLE_VHT | IEEE80211_STA_DISABLE_HE; rcu_read_unlock(); -- cgit v1.2.3 From dceababac29d1c53cbc1f7ddf6f688d2df01da87 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 22 Oct 2020 22:17:49 +0200 Subject: netfilter: nftables: fix netlink report logic in flowtable and genid The netlink report should be sent regardless the available listeners. Fixes: 84d7fce69388 ("netfilter: nf_tables: export rule-set generation ID") Fixes: 3b49e2e94e6e ("netfilter: nf_tables: add flow table netlink frontend") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 65cb8e3c13d9..9b70e136fb5d 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -7137,7 +7137,7 @@ static void nf_tables_flowtable_notify(struct nft_ctx *ctx, GFP_KERNEL); kfree(buf); - if (ctx->report && + if (!ctx->report && !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES)) return; @@ -7259,7 +7259,7 @@ static void nf_tables_gen_notify(struct net *net, struct sk_buff *skb, audit_log_nfcfg("?:0;?:0", 0, net->nft.base_seq, AUDIT_NFT_OP_GEN_REGISTER, GFP_KERNEL); - if (nlmsg_report(nlh) && + if (!nlmsg_report(nlh) && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) return; -- cgit v1.2.3 From 46d6c5ae953cc0be38efd0e469284df7c4328cf8 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 29 Oct 2020 03:56:06 +0100 Subject: netfilter: use actual socket sk rather than skb sk when routing harder If netfilter changes the packet mark when mangling, the packet is rerouted using the route_me_harder set of functions. Prior to this commit, there's one big difference between route_me_harder and the ordinary initial routing functions, described in the comment above __ip_queue_xmit(): /* Note: skb->sk can be different from sk, in case of tunnels */ int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, That function goes on to correctly make use of sk->sk_bound_dev_if, rather than skb->sk->sk_bound_dev_if. And indeed the comment is true: a tunnel will receive a packet in ndo_start_xmit with an initial skb->sk. It will make some transformations to that packet, and then it will send the encapsulated packet out of a *new* socket. That new socket will basically always have a different sk_bound_dev_if (otherwise there'd be a routing loop). So for the purposes of routing the encapsulated packet, the routing information as it pertains to the socket should come from that socket's sk, rather than the packet's original skb->sk. For that reason __ip_queue_xmit() and related functions all do the right thing. One might argue that all tunnels should just call skb_orphan(skb) before transmitting the encapsulated packet into the new socket. But tunnels do *not* do this -- and this is wisely avoided in skb_scrub_packet() too -- because features like TSQ rely on skb->destructor() being called when that buffer space is truely available again. Calling skb_orphan(skb) too early would result in buffers filling up unnecessarily and accounting info being all wrong. Instead, additional routing must take into account the new sk, just as __ip_queue_xmit() notes. So, this commit addresses the problem by fishing the correct sk out of state->sk -- it's already set properly in the call to nf_hook() in __ip_local_out(), which receives the sk as part of its normal functionality. So we make sure to plumb state->sk through the various route_me_harder functions, and then make correct use of it following the example of __ip_queue_xmit(). Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jason A. Donenfeld Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_ipv4.h | 2 +- include/linux/netfilter_ipv6.h | 10 +++++----- net/ipv4/netfilter.c | 8 +++++--- net/ipv4/netfilter/iptable_mangle.c | 2 +- net/ipv4/netfilter/nf_reject_ipv4.c | 2 +- net/ipv6/netfilter.c | 6 +++--- net/ipv6/netfilter/ip6table_mangle.c | 2 +- net/netfilter/ipvs/ip_vs_core.c | 4 ++-- net/netfilter/nf_nat_proto.c | 4 ++-- net/netfilter/nf_synproxy_core.c | 2 +- net/netfilter/nft_chain_route.c | 4 ++-- net/netfilter/utils.c | 4 ++-- 12 files changed, 26 insertions(+), 24 deletions(-) (limited to 'net') diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h index 082e2c41b7ff..5b70ca868bb1 100644 --- a/include/linux/netfilter_ipv4.h +++ b/include/linux/netfilter_ipv4.h @@ -16,7 +16,7 @@ struct ip_rt_info { u_int32_t mark; }; -int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned addr_type); +int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, unsigned addr_type); struct nf_queue_entry; diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index 9b67394471e1..48314ade1506 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -42,7 +42,7 @@ struct nf_ipv6_ops { #if IS_MODULE(CONFIG_IPV6) int (*chk_addr)(struct net *net, const struct in6_addr *addr, const struct net_device *dev, int strict); - int (*route_me_harder)(struct net *net, struct sk_buff *skb); + int (*route_me_harder)(struct net *net, struct sock *sk, struct sk_buff *skb); int (*dev_get_saddr)(struct net *net, const struct net_device *dev, const struct in6_addr *daddr, unsigned int srcprefs, struct in6_addr *saddr); @@ -143,9 +143,9 @@ static inline int nf_br_ip6_fragment(struct net *net, struct sock *sk, #endif } -int ip6_route_me_harder(struct net *net, struct sk_buff *skb); +int ip6_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb); -static inline int nf_ip6_route_me_harder(struct net *net, struct sk_buff *skb) +static inline int nf_ip6_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb) { #if IS_MODULE(CONFIG_IPV6) const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); @@ -153,9 +153,9 @@ static inline int nf_ip6_route_me_harder(struct net *net, struct sk_buff *skb) if (!v6_ops) return -EHOSTUNREACH; - return v6_ops->route_me_harder(net, skb); + return v6_ops->route_me_harder(net, sk, skb); #elif IS_BUILTIN(CONFIG_IPV6) - return ip6_route_me_harder(net, skb); + return ip6_route_me_harder(net, sk, skb); #else return -EHOSTUNREACH; #endif diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index a058213b77a7..7c841037c533 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -17,17 +17,19 @@ #include /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ -int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned int addr_type) +int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, unsigned int addr_type) { const struct iphdr *iph = ip_hdr(skb); struct rtable *rt; struct flowi4 fl4 = {}; __be32 saddr = iph->saddr; - const struct sock *sk = skb_to_full_sk(skb); - __u8 flags = sk ? inet_sk_flowi_flags(sk) : 0; + __u8 flags; struct net_device *dev = skb_dst(skb)->dev; unsigned int hh_len; + sk = sk_to_full_sk(sk); + flags = sk ? inet_sk_flowi_flags(sk) : 0; + if (addr_type == RTN_UNSPEC) addr_type = inet_addr_type_dev_table(net, dev, saddr); if (addr_type == RTN_LOCAL || addr_type == RTN_UNICAST) diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index f703a717ab1d..833079589273 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -62,7 +62,7 @@ ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state) iph->daddr != daddr || skb->mark != mark || iph->tos != tos) { - err = ip_route_me_harder(state->net, skb, RTN_UNSPEC); + err = ip_route_me_harder(state->net, state->sk, skb, RTN_UNSPEC); if (err < 0) ret = NF_DROP_ERR(err); } diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c index 9dcfa4e461b6..93b07739807b 100644 --- a/net/ipv4/netfilter/nf_reject_ipv4.c +++ b/net/ipv4/netfilter/nf_reject_ipv4.c @@ -145,7 +145,7 @@ void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook) ip4_dst_hoplimit(skb_dst(nskb))); nf_reject_ip_tcphdr_put(nskb, oldskb, oth); - if (ip_route_me_harder(net, nskb, RTN_UNSPEC)) + if (ip_route_me_harder(net, nskb->sk, nskb, RTN_UNSPEC)) goto free_nskb; niph = ip_hdr(nskb); diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 6d0e942d082d..ab9a279dd6d4 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -20,10 +20,10 @@ #include #include "../bridge/br_private.h" -int ip6_route_me_harder(struct net *net, struct sk_buff *skb) +int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff *skb) { const struct ipv6hdr *iph = ipv6_hdr(skb); - struct sock *sk = sk_to_full_sk(skb->sk); + struct sock *sk = sk_to_full_sk(sk_partial); unsigned int hh_len; struct dst_entry *dst; int strict = (ipv6_addr_type(&iph->daddr) & @@ -84,7 +84,7 @@ static int nf_ip6_reroute(struct sk_buff *skb, if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) || !ipv6_addr_equal(&iph->saddr, &rt_info->saddr) || skb->mark != rt_info->mark) - return ip6_route_me_harder(entry->state.net, skb); + return ip6_route_me_harder(entry->state.net, entry->state.sk, skb); } return 0; } diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index 1a2748611e00..cee74803d7a1 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -57,7 +57,7 @@ ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state) skb->mark != mark || ipv6_hdr(skb)->hop_limit != hop_limit || flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) { - err = ip6_route_me_harder(state->net, skb); + err = ip6_route_me_harder(state->net, state->sk, skb); if (err < 0) ret = NF_DROP_ERR(err); } diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index cc3c275934f4..c0b8215ab3d4 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -742,12 +742,12 @@ static int ip_vs_route_me_harder(struct netns_ipvs *ipvs, int af, struct dst_entry *dst = skb_dst(skb); if (dst->dev && !(dst->dev->flags & IFF_LOOPBACK) && - ip6_route_me_harder(ipvs->net, skb) != 0) + ip6_route_me_harder(ipvs->net, skb->sk, skb) != 0) return 1; } else #endif if (!(skb_rtable(skb)->rt_flags & RTCF_LOCAL) && - ip_route_me_harder(ipvs->net, skb, RTN_LOCAL) != 0) + ip_route_me_harder(ipvs->net, skb->sk, skb, RTN_LOCAL) != 0) return 1; return 0; diff --git a/net/netfilter/nf_nat_proto.c b/net/netfilter/nf_nat_proto.c index 59151dc07fdc..e87b6bd6b3cd 100644 --- a/net/netfilter/nf_nat_proto.c +++ b/net/netfilter/nf_nat_proto.c @@ -715,7 +715,7 @@ nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb, if (ct->tuplehash[dir].tuple.dst.u3.ip != ct->tuplehash[!dir].tuple.src.u3.ip) { - err = ip_route_me_harder(state->net, skb, RTN_UNSPEC); + err = ip_route_me_harder(state->net, state->sk, skb, RTN_UNSPEC); if (err < 0) ret = NF_DROP_ERR(err); } @@ -953,7 +953,7 @@ nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb, if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3, &ct->tuplehash[!dir].tuple.src.u3)) { - err = nf_ip6_route_me_harder(state->net, skb); + err = nf_ip6_route_me_harder(state->net, state->sk, skb); if (err < 0) ret = NF_DROP_ERR(err); } diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index 9cca35d22927..d7d34a62d3bf 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -446,7 +446,7 @@ synproxy_send_tcp(struct net *net, skb_dst_set_noref(nskb, skb_dst(skb)); nskb->protocol = htons(ETH_P_IP); - if (ip_route_me_harder(net, nskb, RTN_UNSPEC)) + if (ip_route_me_harder(net, nskb->sk, nskb, RTN_UNSPEC)) goto free_nskb; if (nfct) { diff --git a/net/netfilter/nft_chain_route.c b/net/netfilter/nft_chain_route.c index 8826bbe71136..edd02cda57fc 100644 --- a/net/netfilter/nft_chain_route.c +++ b/net/netfilter/nft_chain_route.c @@ -42,7 +42,7 @@ static unsigned int nf_route_table_hook4(void *priv, iph->daddr != daddr || skb->mark != mark || iph->tos != tos) { - err = ip_route_me_harder(state->net, skb, RTN_UNSPEC); + err = ip_route_me_harder(state->net, state->sk, skb, RTN_UNSPEC); if (err < 0) ret = NF_DROP_ERR(err); } @@ -92,7 +92,7 @@ static unsigned int nf_route_table_hook6(void *priv, skb->mark != mark || ipv6_hdr(skb)->hop_limit != hop_limit || flowlabel != *((u32 *)ipv6_hdr(skb)))) { - err = nf_ip6_route_me_harder(state->net, skb); + err = nf_ip6_route_me_harder(state->net, state->sk, skb); if (err < 0) ret = NF_DROP_ERR(err); } diff --git a/net/netfilter/utils.c b/net/netfilter/utils.c index cedf47ab3c6f..2182d361e273 100644 --- a/net/netfilter/utils.c +++ b/net/netfilter/utils.c @@ -191,8 +191,8 @@ static int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry skb->mark == rt_info->mark && iph->daddr == rt_info->daddr && iph->saddr == rt_info->saddr)) - return ip_route_me_harder(entry->state.net, skb, - RTN_UNSPEC); + return ip_route_me_harder(entry->state.net, entry->state.sk, + skb, RTN_UNSPEC); } #endif return 0; -- cgit v1.2.3 From c0391b6ab810381df632677a1dcbbbbd63d05b6d Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 29 Oct 2020 13:50:03 +0100 Subject: netfilter: nf_tables: missing validation from the abort path If userspace does not include the trailing end of batch message, then nfnetlink aborts the transaction. This allows to check that ruleset updates trigger no errors. After this patch, invoking this command from the prerouting chain: # nft -c add rule x y fib saddr . oif type local fails since oif is not supported there. This patch fixes the lack of rule validation from the abort/check path to catch configuration errors such as the one above. Fixes: a654de8fdc18 ("netfilter: nf_tables: fix chain dependency validation") Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nfnetlink.h | 9 ++++++++- net/netfilter/nf_tables_api.c | 15 ++++++++++----- net/netfilter/nfnetlink.c | 22 ++++++++++++++++++---- 3 files changed, 36 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 89016d08f6a2..f6267e2883f2 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -24,6 +24,12 @@ struct nfnl_callback { const u_int16_t attr_count; /* number of nlattr's */ }; +enum nfnl_abort_action { + NFNL_ABORT_NONE = 0, + NFNL_ABORT_AUTOLOAD, + NFNL_ABORT_VALIDATE, +}; + struct nfnetlink_subsystem { const char *name; __u8 subsys_id; /* nfnetlink subsystem ID */ @@ -31,7 +37,8 @@ struct nfnetlink_subsystem { const struct nfnl_callback *cb; /* callback for individual types */ struct module *owner; int (*commit)(struct net *net, struct sk_buff *skb); - int (*abort)(struct net *net, struct sk_buff *skb, bool autoload); + int (*abort)(struct net *net, struct sk_buff *skb, + enum nfnl_abort_action action); void (*cleanup)(struct net *net); bool (*valid_genid)(struct net *net, u32 genid); }; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 9b70e136fb5d..0f58e98542be 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -8053,12 +8053,16 @@ static void nf_tables_abort_release(struct nft_trans *trans) kfree(trans); } -static int __nf_tables_abort(struct net *net, bool autoload) +static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) { struct nft_trans *trans, *next; struct nft_trans_elem *te; struct nft_hook *hook; + if (action == NFNL_ABORT_VALIDATE && + nf_tables_validate(net) < 0) + return -EAGAIN; + list_for_each_entry_safe_reverse(trans, next, &net->nft.commit_list, list) { switch (trans->msg_type) { @@ -8190,7 +8194,7 @@ static int __nf_tables_abort(struct net *net, bool autoload) nf_tables_abort_release(trans); } - if (autoload) + if (action == NFNL_ABORT_AUTOLOAD) nf_tables_module_autoload(net); else nf_tables_module_autoload_cleanup(net); @@ -8203,9 +8207,10 @@ static void nf_tables_cleanup(struct net *net) nft_validate_state_update(net, NFT_VALIDATE_SKIP); } -static int nf_tables_abort(struct net *net, struct sk_buff *skb, bool autoload) +static int nf_tables_abort(struct net *net, struct sk_buff *skb, + enum nfnl_abort_action action) { - int ret = __nf_tables_abort(net, autoload); + int ret = __nf_tables_abort(net, action); mutex_unlock(&net->nft.commit_mutex); @@ -8836,7 +8841,7 @@ static void __net_exit nf_tables_exit_net(struct net *net) { mutex_lock(&net->nft.commit_mutex); if (!list_empty(&net->nft.commit_list)) - __nf_tables_abort(net, false); + __nf_tables_abort(net, NFNL_ABORT_NONE); __nft_release_tables(net); mutex_unlock(&net->nft.commit_mutex); WARN_ON_ONCE(!list_empty(&net->nft.tables)); diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 2daa1f6ae344..d3df66a39b5e 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -333,7 +333,7 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, return netlink_ack(skb, nlh, -EINVAL, NULL); replay: status = 0; - +replay_abort: skb = netlink_skb_clone(oskb, GFP_KERNEL); if (!skb) return netlink_ack(oskb, nlh, -ENOMEM, NULL); @@ -499,7 +499,7 @@ ack: } done: if (status & NFNL_BATCH_REPLAY) { - ss->abort(net, oskb, true); + ss->abort(net, oskb, NFNL_ABORT_AUTOLOAD); nfnl_err_reset(&err_list); kfree_skb(skb); module_put(ss->owner); @@ -510,11 +510,25 @@ done: status |= NFNL_BATCH_REPLAY; goto done; } else if (err) { - ss->abort(net, oskb, false); + ss->abort(net, oskb, NFNL_ABORT_NONE); netlink_ack(oskb, nlmsg_hdr(oskb), err, NULL); } } else { - ss->abort(net, oskb, false); + enum nfnl_abort_action abort_action; + + if (status & NFNL_BATCH_FAILURE) + abort_action = NFNL_ABORT_NONE; + else + abort_action = NFNL_ABORT_VALIDATE; + + err = ss->abort(net, oskb, abort_action); + if (err == -EAGAIN) { + nfnl_err_reset(&err_list); + kfree_skb(skb); + module_put(ss->owner); + status |= NFNL_BATCH_FAILURE; + goto replay_abort; + } } if (ss->cleanup) ss->cleanup(net); -- cgit v1.2.3 From 9e7c5b396e98eed859d3dd1ab235912a296faab5 Mon Sep 17 00:00:00 2001 From: Alexander Ovechkin Date: Thu, 29 Oct 2020 20:10:12 +0300 Subject: ip6_tunnel: set inner ipproto before ip6_tnl_encap ip6_tnl_encap assigns to proto transport protocol which encapsulates inner packet, but we must pass to set_inner_ipproto protocol of that inner packet. Calling set_inner_ipproto after ip6_tnl_encap might break gso. For example, in case of encapsulating ipv6 packet in fou6 packet, inner_ipproto would be set to IPPROTO_UDP instead of IPPROTO_IPV6. This would lead to incorrect calling sequence of gso functions: ipv6_gso_segment -> udp6_ufo_fragment -> skb_udp_tunnel_segment -> udp6_ufo_fragment instead of: ipv6_gso_segment -> udp6_ufo_fragment -> skb_udp_tunnel_segment -> ip6ip6_gso_segment Fixes: 6c11fbf97e69 ("ip6_tunnel: add MPLS transmit support") Signed-off-by: Alexander Ovechkin Link: https://lore.kernel.org/r/20201029171012.20904-1-ovov@yandex-team.ru Signed-off-by: Jakub Kicinski --- net/ipv6/ip6_tunnel.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index a0217e5bf3bc..648db3fe508f 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1271,6 +1271,8 @@ route_lookup: if (max_headroom > dev->needed_headroom) dev->needed_headroom = max_headroom; + skb_set_inner_ipproto(skb, proto); + err = ip6_tnl_encap(skb, t, &proto, fl6); if (err) return err; @@ -1280,8 +1282,6 @@ route_lookup: ipv6_push_frag_opts(skb, &opt.ops, &proto); } - skb_set_inner_ipproto(skb, proto); - skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); ipv6h = ipv6_hdr(skb); -- cgit v1.2.3 From 7d10e62c2ff8e084c136c94d32d9a94de4d31248 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Thu, 29 Oct 2020 16:39:46 +0100 Subject: netfilter: ipset: Update byte and packet counters regardless of whether they match In ip_set_match_extensions(), for sets with counters, we take care of updating counters themselves by calling ip_set_update_counter(), and of checking if the given comparison and values match, by calling ip_set_match_counter() if needed. However, if a given comparison on counters doesn't match the configured values, that doesn't mean the set entry itself isn't matching. This fix restores the behaviour we had before commit 4750005a85f7 ("netfilter: ipset: Fix "don't update counters" mode when counters used at the matching"), without reintroducing the issue fixed there: back then, mtype_data_match() first updated counters in any case, and then took care of matching on counters. Now, if the IPSET_FLAG_SKIP_COUNTER_UPDATE flag is set, ip_set_update_counter() will anyway skip counter updates if desired. The issue observed is illustrated by this reproducer: ipset create c hash:ip counters ipset add c 192.0.2.1 iptables -I INPUT -m set --match-set c src --bytes-gt 800 -j DROP if we now send packets from 192.0.2.1, bytes and packets counters for the entry as shown by 'ipset list' are always zero, and, no matter how many bytes we send, the rule will never match, because counters themselves are not updated. Reported-by: Mithil Mhatre Fixes: 4750005a85f7 ("netfilter: ipset: Fix "don't update counters" mode when counters used at the matching") Signed-off-by: Stefano Brivio Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 6f35832f0de3..7cff6e5e7445 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -637,13 +637,14 @@ ip_set_match_extensions(struct ip_set *set, const struct ip_set_ext *ext, if (SET_WITH_COUNTER(set)) { struct ip_set_counter *counter = ext_counter(data, set); + ip_set_update_counter(counter, ext, flags); + if (flags & IPSET_FLAG_MATCH_COUNTERS && !(ip_set_match_counter(ip_set_get_packets(counter), mext->packets, mext->packets_op) && ip_set_match_counter(ip_set_get_bytes(counter), mext->bytes, mext->bytes_op))) return false; - ip_set_update_counter(counter, ext, flags); } if (SET_WITH_SKBINFO(set)) ip_set_get_skbinfo(ext_skbinfo(data, set), -- cgit v1.2.3 From 2f71e00619dcde3d8a98ba3e7f52e98282504b7d Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 27 Oct 2020 11:49:25 +0000 Subject: net: atm: fix update of position index in lec_seq_next The position index in leq_seq_next is not updated when the next entry is fetched an no more entries are available. This causes seq_file to report the following error: "seq_file: buggy .next function lec_seq_next [lec] did not update position index" Fix this by always updating the position index. [ Note: this is an ancient 2002 bug, the sha is from the tglx/history repo ] Fixes 4aea2cbff417 ("[ATM]: Move lan seq_file ops to lec.c [1/3]") Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20201027114925.21843-1-colin.king@canonical.com Signed-off-by: Jakub Kicinski --- net/atm/lec.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/atm/lec.c b/net/atm/lec.c index dbabb65d8b67..7226c784dbe0 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -954,9 +954,8 @@ static void *lec_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct lec_state *state = seq->private; - v = lec_get_idx(state, 1); - *pos += !!PTR_ERR(v); - return v; + ++*pos; + return lec_get_idx(state, 1); } static int lec_seq_show(struct seq_file *seq, void *v) -- cgit v1.2.3 From 2efdaaaf883a143061296467913c01aa1ff4b3ce Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Tue, 27 Oct 2020 20:33:13 +0800 Subject: IPv6: reply ICMP error if the first fragment don't include all headers Based on RFC 8200, Section 4.5 Fragment Header: - If the first fragment does not include all headers through an Upper-Layer header, then that fragment should be discarded and an ICMP Parameter Problem, Code 3, message should be sent to the source of the fragment, with the Pointer field set to zero. Checking each packet header in IPv6 fast path will have performance impact, so I put the checking in ipv6_frag_rcv(). As the packet may be any kind of L4 protocol, I only checked some common protocols' header length and handle others by (offset + 1) > skb->len. Also use !(frag_off & htons(IP6_OFFSET)) to catch atomic fragments (fragmented packet with only one fragment). When send ICMP error message, if the 1st truncated fragment is ICMP message, icmp6_send() will break as is_ineligible() return true. So I added a check in is_ineligible() to let fragment packet with nexthdr ICMP but no ICMP header return false. Signed-off-by: Hangbin Liu Signed-off-by: Jakub Kicinski --- net/ipv6/icmp.c | 8 +++++++- net/ipv6/reassembly.c | 33 ++++++++++++++++++++++++++++++++- 2 files changed, 39 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index ec448b71bf9a..8956144ea65e 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -158,7 +158,13 @@ static bool is_ineligible(const struct sk_buff *skb) tp = skb_header_pointer(skb, ptr+offsetof(struct icmp6hdr, icmp6_type), sizeof(_type), &_type); - if (!tp || !(*tp & ICMPV6_INFOMSG_MASK)) + + /* Based on RFC 8200, Section 4.5 Fragment Header, return + * false if this is a fragment packet with no icmp header info. + */ + if (!tp && frag_off != 0) + return false; + else if (!tp || !(*tp & ICMPV6_INFOMSG_MASK)) return true; } return false; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 1f5d4d196dcc..c8cf1bbad74a 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -42,6 +42,8 @@ #include #include #include +#include +#include #include #include @@ -322,7 +324,9 @@ static int ipv6_frag_rcv(struct sk_buff *skb) struct frag_queue *fq; const struct ipv6hdr *hdr = ipv6_hdr(skb); struct net *net = dev_net(skb_dst(skb)->dev); - int iif; + __be16 frag_off; + int iif, offset; + u8 nexthdr; if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED) goto fail_hdr; @@ -351,6 +355,33 @@ static int ipv6_frag_rcv(struct sk_buff *skb) return 1; } + /* RFC 8200, Section 4.5 Fragment Header: + * If the first fragment does not include all headers through an + * Upper-Layer header, then that fragment should be discarded and + * an ICMP Parameter Problem, Code 3, message should be sent to + * the source of the fragment, with the Pointer field set to zero. + */ + nexthdr = hdr->nexthdr; + offset = ipv6_skip_exthdr(skb, skb_transport_offset(skb), &nexthdr, &frag_off); + if (offset >= 0) { + /* Check some common protocols' header */ + if (nexthdr == IPPROTO_TCP) + offset += sizeof(struct tcphdr); + else if (nexthdr == IPPROTO_UDP) + offset += sizeof(struct udphdr); + else if (nexthdr == IPPROTO_ICMPV6) + offset += sizeof(struct icmp6hdr); + else + offset += 1; + + if (!(frag_off & htons(IP6_OFFSET)) && offset > skb->len) { + __IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev), + IPSTATS_MIB_INHDRERRORS); + icmpv6_param_prob(skb, ICMPV6_HDR_INCOMP, 0); + return -1; + } + } + iif = skb->dev ? skb->dev->ifindex : 0; fq = fq_find(net, fhdr->identification, hdr, iif); if (fq) { -- cgit v1.2.3 From 20149e9eb68c003eaa09e7c9a49023df40779552 Mon Sep 17 00:00:00 2001 From: wenxu Date: Fri, 30 Oct 2020 11:32:08 +0800 Subject: ip_tunnel: fix over-mtu packet send fail without TUNNEL_DONT_FRAGMENT flags The tunnel device such as vxlan, bareudp and geneve in the lwt mode set the outer df only based TUNNEL_DONT_FRAGMENT. And this was also the behavior for gre device before switching to use ip_md_tunnel_xmit in commit 962924fa2b7a ("ip_gre: Refactor collect metatdata mode tunnel xmit to ip_md_tunnel_xmit") When the ip_gre in lwt mode xmit with ip_md_tunnel_xmi changed the rule and make the discrepancy between handling of DF by different tunnels. So in the ip_md_tunnel_xmit should follow the same rule like other tunnels. Fixes: cfc7381b3002 ("ip_tunnel: add collect_md mode to IPIP tunnel") Signed-off-by: wenxu Link: https://lore.kernel.org/r/1604028728-31100-1-git-send-email-wenxu@ucloud.cn Signed-off-by: Jakub Kicinski --- net/ipv4/ip_tunnel.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 8b04d1dcfec4..ee65c9225178 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -608,9 +608,6 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, ttl = ip4_dst_hoplimit(&rt->dst); } - if (!df && skb->protocol == htons(ETH_P_IP)) - df = inner_iph->frag_off & htons(IP_DF); - headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len; if (headroom > dev->needed_headroom) dev->needed_headroom = headroom; -- cgit v1.2.3 From b6df8c81412190fbd5eaa3cec7f642142d9c16cd Mon Sep 17 00:00:00 2001 From: Petr Malat Date: Fri, 30 Oct 2020 14:26:33 +0100 Subject: sctp: Fix COMM_LOST/CANT_STR_ASSOC err reporting on big-endian platforms Commit 978aa0474115 ("sctp: fix some type cast warnings introduced since very beginning")' broke err reading from sctp_arg, because it reads the value as 32-bit integer, although the value is stored as 16-bit integer. Later this value is passed to the userspace in 16-bit variable, thus the user always gets 0 on big-endian platforms. Fix it by reading the __u16 field of sctp_arg union, as reading err field would produce a sparse warning. Fixes: 978aa0474115 ("sctp: fix some type cast warnings introduced since very beginning") Signed-off-by: Petr Malat Acked-by: Marcelo Ricardo Leitner Link: https://lore.kernel.org/r/20201030132633.7045-1-oss@malat.biz Signed-off-by: Jakub Kicinski --- net/sctp/sm_sideeffect.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index aa821e71f05e..813d30767204 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -1601,12 +1601,12 @@ static int sctp_cmd_interpreter(enum sctp_event_type event_type, break; case SCTP_CMD_INIT_FAILED: - sctp_cmd_init_failed(commands, asoc, cmd->obj.u32); + sctp_cmd_init_failed(commands, asoc, cmd->obj.u16); break; case SCTP_CMD_ASSOC_FAILED: sctp_cmd_assoc_failed(commands, asoc, event_type, - subtype, chunk, cmd->obj.u32); + subtype, chunk, cmd->obj.u16); break; case SCTP_CMD_INIT_COUNTER_INC: -- cgit v1.2.3 From e16b874ee87aa70cd0a7145346ff5f41349b514c Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Mon, 2 Nov 2020 10:09:49 +0100 Subject: mptcp: token: fix unititialized variable gcc complains about use of uninitialized 'num'. Fix it by doing the first assignment of 'num' when the variable is declared. Fixes: 96d890daad05 ("mptcp: add msk interations helper") Signed-off-by: Davide Caratti Acked-by: Paolo Abeni Link: https://lore.kernel.org/r/49e20da5d467a73414d4294a8bd35e2cb1befd49.1604308087.git.dcaratti@redhat.com Signed-off-by: Jakub Kicinski --- net/mptcp/token.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mptcp/token.c b/net/mptcp/token.c index 8b47c4bb1c6b..feb4b9ffd462 100644 --- a/net/mptcp/token.c +++ b/net/mptcp/token.c @@ -291,7 +291,7 @@ struct mptcp_sock *mptcp_token_iter_next(const struct net *net, long *s_slot, { struct mptcp_sock *ret = NULL; struct hlist_nulls_node *pos; - int slot, num; + int slot, num = 0; for (slot = *s_slot; slot <= token_mask; *s_num = 0, slot++) { struct token_bucket *bucket = &token_hash[slot]; -- cgit v1.2.3 From 3accbfdc36130282f5ae9e6eecfdf820169fedce Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Tue, 14 Jul 2020 14:44:50 +0800 Subject: can: proc: can_remove_proc(): silence remove_proc_entry warning If can_init_proc() fail to create /proc/net/can directory, can_remove_proc() will trigger a warning: WARNING: CPU: 6 PID: 7133 at fs/proc/generic.c:672 remove_proc_entry+0x17b0 Kernel panic - not syncing: panic_on_warn set ... Fix to return early from can_remove_proc() if can proc_dir does not exists. Signed-off-by: Zhang Changzhong Link: https://lore.kernel.org/r/1594709090-3203-1-git-send-email-zhangchangzhong@huawei.com Fixes: 8e8cda6d737d ("can: initial support for network namespaces") Acked-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- net/can/proc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/can/proc.c b/net/can/proc.c index 550928b8b8a2..5ea8695f507e 100644 --- a/net/can/proc.c +++ b/net/can/proc.c @@ -462,6 +462,9 @@ void can_init_proc(struct net *net) */ void can_remove_proc(struct net *net) { + if (!net->can.proc_dir) + return; + if (net->can.pde_stats) remove_proc_entry(CAN_PROC_STATS, net->can.proc_dir); @@ -486,6 +489,5 @@ void can_remove_proc(struct net *net) if (net->can.pde_rcvlist_sff) remove_proc_entry(CAN_PROC_RCVLIST_SFF, net->can.proc_dir); - if (net->can.proc_dir) - remove_proc_entry("can", net->proc_net); + remove_proc_entry("can", net->proc_net); } -- cgit v1.2.3 From 08c487d8d807535f509ed80c6a10ad90e6872139 Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Mon, 7 Sep 2020 14:31:48 +0800 Subject: can: j1939: j1939_sk_bind(): return failure if netdev is down When a netdev down event occurs after a successful call to j1939_sk_bind(), j1939_netdev_notify() can handle it correctly. But if the netdev already in down state before calling j1939_sk_bind(), j1939_sk_release() will stay in wait_event_interruptible() blocked forever. Because in this case, j1939_netdev_notify() won't be called and j1939_tp_txtimer() won't call j1939_session_cancel() or other function to clear session for ENETDOWN error, this lead to mismatch of j1939_session_get/put() and jsk->skb_pending will never decrease to zero. To reproduce it use following commands: 1. ip link add dev vcan0 type vcan 2. j1939acd -r 100,80-120 1122334455667788 vcan0 3. presses ctrl-c and thread will be blocked forever This patch adds check for ndev->flags in j1939_sk_bind() to avoid this kind of situation and return with -ENETDOWN. Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol") Signed-off-by: Zhang Changzhong Link: https://lore.kernel.org/r/1599460308-18770-1-git-send-email-zhangchangzhong@huawei.com Acked-by: Oleksij Rempel Signed-off-by: Marc Kleine-Budde --- net/can/j1939/socket.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c index 1be4c898b2fa..f23966526a88 100644 --- a/net/can/j1939/socket.c +++ b/net/can/j1939/socket.c @@ -475,6 +475,12 @@ static int j1939_sk_bind(struct socket *sock, struct sockaddr *uaddr, int len) goto out_release_sock; } + if (!(ndev->flags & IFF_UP)) { + dev_put(ndev); + ret = -ENETDOWN; + goto out_release_sock; + } + priv = j1939_netdev_start(ndev); dev_put(ndev); if (IS_ERR(priv)) { -- cgit v1.2.3 From 5a7de2408fa59be2a92b3ed4a5614fbb6942893d Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 13 Oct 2020 16:13:41 +0200 Subject: can: isotp: Explain PDU in CAN_ISOTP help text The help text for the CAN_ISOTP config symbol uses the acronym "PDU". However, this acronym is not explained here, nor in Documentation/networking/can.rst. Expand the acronym to make it easier for users to decide if they need to enable the CAN_ISOTP option or not. Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20201013141341.28487-1-geert+renesas@glider.be Acked-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- net/can/Kconfig | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/can/Kconfig b/net/can/Kconfig index 224e5e0283a9..7c9958df91d3 100644 --- a/net/can/Kconfig +++ b/net/can/Kconfig @@ -62,8 +62,9 @@ config CAN_ISOTP communication between CAN nodes via two defined CAN Identifiers. As CAN frames can only transport a small amount of data bytes (max. 8 bytes for 'classic' CAN and max. 64 bytes for CAN FD) this - segmentation is needed to transport longer PDUs as needed e.g. for - vehicle diagnosis (UDS, ISO 14229) or IP-over-CAN traffic. + segmentation is needed to transport longer Protocol Data Units (PDU) + as needed e.g. for vehicle diagnosis (UDS, ISO 14229) or IP-over-CAN + traffic. This protocol driver implements data transfers according to ISO 15765-2:2016 for 'classic' CAN and CAN FD frame types. If you want to perform automotive vehicle diagnostic services (UDS), -- cgit v1.2.3 From 78656ea235191c8cf8475f8335860ca4192243a4 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Mon, 19 Oct 2020 14:02:29 +0200 Subject: can: isotp: isotp_rcv_cf(): enable RX timeout handling in listen-only mode As reported by Thomas Wagner: https://github.com/hartkopp/can-isotp/issues/34 the timeout handling for data frames is not enabled when the isotp socket is used in listen-only mode (sockopt CAN_ISOTP_LISTEN_MODE). This mode is enabled by the isotpsniffer application which therefore became inconsistend with the strict rx timeout rules when running the isotp protocol in the operational mode. This patch fixes this inconsistency by moving the return condition for the listen-only mode behind the timeout handling code. Reported-by: Thomas Wagner Signed-off-by: Oliver Hartkopp Fixes: e057dd3fc20f ("can: add ISO 15765-2:2016 transport protocol") Link: https://github.com/hartkopp/can-isotp/issues/34 Link: https://lore.kernel.org/r/20201019120229.89326-1-socketcan@hartkopp.net Signed-off-by: Marc Kleine-Budde --- net/can/isotp.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/can/isotp.c b/net/can/isotp.c index 4c2062875893..a79287ef86da 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -569,10 +569,6 @@ static int isotp_rcv_cf(struct sock *sk, struct canfd_frame *cf, int ae, return 0; } - /* no creation of flow control frames */ - if (so->opt.flags & CAN_ISOTP_LISTEN_MODE) - return 0; - /* perform blocksize handling, if enabled */ if (!so->rxfc.bs || ++so->rx.bs < so->rxfc.bs) { /* start rx timeout watchdog */ @@ -581,6 +577,10 @@ static int isotp_rcv_cf(struct sock *sk, struct canfd_frame *cf, int ae, return 0; } + /* no creation of flow control frames */ + if (so->opt.flags & CAN_ISOTP_LISTEN_MODE) + return 0; + /* we reached the specified blocksize so->rxfc.bs */ isotp_send_fc(sk, ae, ISOTP_FC_CTS); return 0; -- cgit v1.2.3 From c3ddac4b0c9a280d4d5b670b4d39c50fee88579e Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 20 Oct 2020 16:42:03 +0100 Subject: can: isotp: padlen(): make const array static, makes object smaller Don't populate the const array plen on the stack but instead it static. Makes the object code smaller by 926 bytes. Before: text data bss dec hex filename 26531 1943 64 28538 6f7a net/can/isotp.o After: text data bss dec hex filename 25509 2039 64 27612 6bdc net/can/isotp.o (gcc version 10.2.0) Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20201020154203.54711-1-colin.king@canonical.com Signed-off-by: Marc Kleine-Budde --- net/can/isotp.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/can/isotp.c b/net/can/isotp.c index a79287ef86da..d78ab13bd8be 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -252,14 +252,16 @@ static void isotp_rcv_skb(struct sk_buff *skb, struct sock *sk) static u8 padlen(u8 datalen) { - const u8 plen[] = {8, 8, 8, 8, 8, 8, 8, 8, 8, /* 0 - 8 */ - 12, 12, 12, 12, /* 9 - 12 */ - 16, 16, 16, 16, /* 13 - 16 */ - 20, 20, 20, 20, /* 17 - 20 */ - 24, 24, 24, 24, /* 21 - 24 */ - 32, 32, 32, 32, 32, 32, 32, 32, /* 25 - 32 */ - 48, 48, 48, 48, 48, 48, 48, 48, /* 33 - 40 */ - 48, 48, 48, 48, 48, 48, 48, 48}; /* 41 - 48 */ + static const u8 plen[] = { + 8, 8, 8, 8, 8, 8, 8, 8, 8, /* 0 - 8 */ + 12, 12, 12, 12, /* 9 - 12 */ + 16, 16, 16, 16, /* 13 - 16 */ + 20, 20, 20, 20, /* 17 - 20 */ + 24, 24, 24, 24, /* 21 - 24 */ + 32, 32, 32, 32, 32, 32, 32, 32, /* 25 - 32 */ + 48, 48, 48, 48, 48, 48, 48, 48, /* 33 - 40 */ + 48, 48, 48, 48, 48, 48, 48, 48 /* 41 - 48 */ + }; if (datalen > 48) return 64; -- cgit v1.2.3 From fea07a487c6dd422dc8837237c9d2bc7c33119af Mon Sep 17 00:00:00 2001 From: Eelco Chaudron Date: Tue, 3 Nov 2020 09:25:49 +0100 Subject: net: openvswitch: silence suspicious RCU usage warning Silence suspicious RCU usage warning in ovs_flow_tbl_masks_cache_resize() by replacing rcu_dereference() with rcu_dereference_ovsl(). In addition, when creating a new datapath, make sure it's configured under the ovs_lock. Fixes: 9bf24f594c6a ("net: openvswitch: make masks cache size configurable") Reported-by: syzbot+9a8f8bfcc56e8578016c@syzkaller.appspotmail.com Signed-off-by: Eelco Chaudron Link: https://lore.kernel.org/r/160439190002.56943.1418882726496275961.stgit@ebuild Signed-off-by: Jakub Kicinski --- net/openvswitch/datapath.c | 14 +++++++------- net/openvswitch/flow_table.c | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 832f898edb6a..9d6ef6cb9b26 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1703,13 +1703,13 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) parms.port_no = OVSP_LOCAL; parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID]; - err = ovs_dp_change(dp, a); - if (err) - goto err_destroy_meters; - /* So far only local changes have been made, now need the lock. */ ovs_lock(); + err = ovs_dp_change(dp, a); + if (err) + goto err_unlock_and_destroy_meters; + vport = new_vport(&parms); if (IS_ERR(vport)) { err = PTR_ERR(vport); @@ -1725,8 +1725,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) ovs_dp_reset_user_features(skb, info); } - ovs_unlock(); - goto err_destroy_meters; + goto err_unlock_and_destroy_meters; } err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, @@ -1741,7 +1740,8 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) ovs_notify(&dp_datapath_genl_family, reply, info); return 0; -err_destroy_meters: +err_unlock_and_destroy_meters: + ovs_unlock(); ovs_meters_exit(dp); err_destroy_ports: kfree(dp->ports); diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index f3486a37361a..c89c8da99f1a 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -390,7 +390,7 @@ static struct mask_cache *tbl_mask_cache_alloc(u32 size) } int ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32 size) { - struct mask_cache *mc = rcu_dereference(table->mask_cache); + struct mask_cache *mc = rcu_dereference_ovsl(table->mask_cache); struct mask_cache *new; if (size == mc->cache_size) -- cgit v1.2.3